import pytz from pkg.tool import get_tweets_since_last_friday, aggregate_data import numpy as np from scipy.stats import norm from datetime import timedelta, datetime from pkg.config import render_data def get_last_7_days_data(): est = pytz.timezone('US/Eastern') now = datetime.now(est).date() last_7_days = [now - timedelta(days=i) for i in range(7)] data = render_data.global_agg_df[ render_data.global_agg_df['date'].isin(last_7_days)].copy() return data def get_hourly_weighted_array(): data = get_last_7_days_data() if data.empty: return [1 / 24] * 24 agg_data = aggregate_data(data, 60) one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index() tweet_count_total = one_day_data['tweet_count'].sum() hourly_rates = [0] * 24 for _, row in one_day_data.iterrows(): minute = row['interval_group'] hour = int(minute // 60) if hour < 24: hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0 total_rate = sum(hourly_rates) if total_rate > 0: hourly_rates = [rate / total_rate for rate in hourly_rates] else: hourly_rates = [1 / 24] * 24 return hourly_rates def calculate_variance_factor(): data = get_last_7_days_data() if data.empty or 'tweet_count' not in data.columns: return 1.5 data['hour'] = data['minute_of_day'] // 60 hourly_data = data.groupby(['date', 'hour'])['tweet_count'].sum().reset_index() hourly_stats = hourly_data.groupby('hour')['tweet_count'].agg(['mean', 'var']).reset_index() variance_factors = hourly_stats['var'] / hourly_stats['mean'] return np.mean(variance_factors[variance_factors.notna()]) or 1.5 def get_dynamic_hourly_weights(): data = get_last_7_days_data() if data.empty: return [1 / 24] * 24 weights = [0.2, 0.2, 0.3, 0.3, 0.5, 0.5, 0.5] hourly_rates = [0] * 24 for day_idx, day in enumerate(get_last_7_days_data()['date'].unique()): day_data = data[data['date'] == day].copy() if day_data.empty: continue agg_data = aggregate_data(day_data, 60) day_tweets = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index() day_total = day_tweets['tweet_count'].sum() for _, row in day_tweets.iterrows(): minute = row['interval_group'] hour = int(minute // 60) if hour < 24: hourly_rates[hour] += (row['tweet_count'] / day_total if day_total > 0 else 0) * weights[day_idx % 7] total_rate = sum(hourly_rates) if total_rate > 0: hourly_rates = [rate / total_rate for rate in hourly_rates] else: hourly_rates = [1 / 24] * 24 return hourly_rates def get_pace_params(): est = pytz.timezone('US/Eastern') now = datetime.now(est) today = now.date() days_to_next_friday = (4 - today.weekday()) % 7 next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) + timedelta(days=days_to_next_friday)) if now > next_friday: next_friday += timedelta(days=7) days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60) tweet_count = get_tweets_since_last_friday() return tweet_count, days_to_next_friday def calculate_tweet_pace(): tweet_count, days_to_next_friday = get_pace_params() pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count return round(pace, 6) if pace > 0 else float(tweet_count) def calculate_pace_decline_rate(): tweet_count, days_to_next_friday = get_pace_params() T = 7 decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2) decline_per_hour = decline_per_day / 24 return round(decline_per_hour, 2) def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace): future_days = days_to_next_friday - (hours / 24) new_tweet_count = tweet_count + increment if future_days <= 0: return round(new_tweet_count, 2) new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count return round(max(new_pace, new_tweet_count), 2) def calculate_pace_increase_in_hour(increment_value, hour_value): tweet_count, days_to_next_friday = get_pace_params() current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count increments = [0, 1, 5, 10, 20] pace_increases = {} for inc in increments: pace_increases[f'increase_{inc}'] = calculate_pace_for_increment( inc, 1, tweet_count, days_to_next_friday, current_pace ) if increment_value is None or hour_value is None: pace_increases['custom_increment'] = None else: increment = int(increment_value) hours = int(hour_value) pace_increases['custom_increment'] = calculate_pace_for_increment( increment, hours, tweet_count, days_to_next_friday, current_pace ) pace_increases['custom_increment_key'] = increment return pace_increases def calculate_avg_tweets_per_day(target, now, remain): Xi = get_hourly_weighted_array() if remain <= 0: return "remain<=0" if target <= now: return "Already reach" fx = max(remain - 12, 0) if remain > 12: fy = sum(Xi[0:12]) * 24 else: full_hours = int(remain) fractional_hour = remain - full_hours if full_hours >= 24: full_hours = 23 fractional_hour = 0 if full_hours < 0: full_hours = 0 if full_hours > 0: fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour else: fy = Xi[0] * fractional_hour fy *= 24 if fx + fy == 0: return "fx + fy = 0" result = (target - now) / ((fx + fy) / 24) return round(result, 2) def calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end, peak_percentile=75): remaining_hours = days_to_next_friday * 24 if remaining_hours <= 0: return "0.0000 - 0.0000" # 测试时间超出周期结束,返回默认值 hourly_weights = get_dynamic_hourly_weights() data = get_last_7_days_data() if data.empty: recent_tweets = [70] * 7 else: agg_data = aggregate_data(data, 1440) daily_tweets = agg_data.groupby('date')['tweet_count'].sum().reset_index() recent_tweets = daily_tweets['tweet_count'].tolist()[-7:] if len(recent_tweets) < 7: recent_tweets = recent_tweets + [70] * (7 - len(recent_tweets)) recent_3_days = np.mean(recent_tweets[-3:]) past_4_days = np.mean(recent_tweets[:-3]) if len(recent_tweets) > 3 else 70 daily_avg = 0.8 * recent_3_days + 0.2 * past_4_days daily_avg_std = np.std(recent_tweets) if len(recent_tweets) >= 7 else np.std([70] * 7) peak_threshold = np.percentile(hourly_weights, peak_percentile) segments = [] current_segment = [] for i in range(24): if hourly_weights[i] >= peak_threshold: current_segment.append(i) elif current_segment: segments.append(current_segment) current_segment = [] if current_segment: segments.append(current_segment) lambda_remaining = 0 variance_factor = calculate_variance_factor() total_weight = sum(hourly_weights) for segment in segments: hours_in_segment = len(segment) * (remaining_hours / 24) segment_weight_avg = np.mean([hourly_weights[i] for i in segment]) lambda_segment = daily_avg * (hours_in_segment / remaining_hours) * (segment_weight_avg / (total_weight / 24)) lambda_remaining += lambda_segment mu_low = (daily_avg - daily_avg_std) * (remaining_hours / 24) mu_high = (daily_avg + daily_avg_std) * (remaining_hours / 24) var_low = mu_low * variance_factor var_high = mu_high * variance_factor sigma_low = np.sqrt(var_low) sigma_high = np.sqrt(var_high) a = prob_start - tweet_count b = prob_end - tweet_count if tweet_count > prob_end: return "0.0000 - 0.0000" if a < 0: a = 0 if b < 0: return "0.0000 - 0.0000" prob_low = norm.cdf((b - mu_low) / sigma_low) - norm.cdf((a - mu_low) / sigma_low) prob_high = norm.cdf((b - mu_high) / sigma_high) - norm.cdf((a - mu_high) / sigma_high) prob_low = max(0.0, min(1.0, prob_low)) prob_high = max(0.0, min(1.0, prob_high)) prob_min = min(prob_low, prob_high) prob_max = max(prob_low, prob_high) return f"{prob_min:.4f} - {prob_max:.4f}"