From a8125246c2efe4dd05b45a49a0bebf82332db6c3 Mon Sep 17 00:00:00 2001 From: NY Date: Fri, 14 Mar 2025 15:08:29 +0800 Subject: [PATCH] +probability(unfinished) --- pkg/dash/app_html.py | 35 +++++- pkg/dash/func/info.py | 95 +--------------- pkg/dash/func/info_func.py | 218 +++++++++++++++++++++++++++++++++++++ pkg/dash/func/info_m.py | 37 +++++++ pkg/tool.py | 31 +----- 5 files changed, 292 insertions(+), 124 deletions(-) create mode 100644 pkg/dash/func/info_func.py create mode 100644 pkg/dash/func/info_m.py diff --git a/pkg/dash/app_html.py b/pkg/dash/app_html.py index 8bbbab9..04333be 100644 --- a/pkg/dash/app_html.py +++ b/pkg/dash/app_html.py @@ -88,7 +88,7 @@ def layout_config(app): } ), html.A( - href='https://x.com/elonmusk', + href='https://x.com/elonmusk/with_replies', target='_blank', children=[ html.Img( @@ -193,7 +193,38 @@ def layout_config(app): style={'width': '100%'} ) ) - ]) + ]), + html.Tr([ + html.Td("Predict Tweets Start:", style={'paddingRight': '10px'}), + html.Td( + dcc.Input( + id='prob-start-input', + type='number', + placeholder='输入 Probability Start 值', + value=525, + style={'width': '100%'} + ) + ) + ]), + html.Tr([ + html.Td("Predict Tweets End:", style={'paddingRight': '10px'}), + html.Td( + dcc.Input( + id='prob-end-input', + type='number', + placeholder='输入 Probability End 值', + value=549, + style={'width': '100%'} + ) + ) + ]), + html.Tr([ + html.Td("Calculate Probability:", style={'paddingRight': '10px'}), + html.Td( + html.Button('Calculate', id='update-button', n_clicks=0) + ) + ]), + html.Tr(id='manual-info-tooltip', style={'margin': '10px'}) ], style={ 'width': '50%', 'marginTop': '10px', diff --git a/pkg/dash/func/info.py b/pkg/dash/func/info.py index 72418fc..a17ece4 100644 --- a/pkg/dash/func/info.py +++ b/pkg/dash/func/info.py @@ -1,9 +1,7 @@ -import pytz -from pkg.tool import get_tweets_since_last_friday, format_time_str, get_time_since_last_tweet, get_hourly_weighted_array +from pkg.dash.func.info_func import * +from pkg.tool import format_time_str, get_time_since_last_tweet from pkg.dash.app_init import app from dash.dependencies import Input, Output -from datetime import timedelta -from datetime import datetime from dash import html @@ -87,92 +85,3 @@ def update_info(n, target_value, increment_value, hour_value): }) return [pace_table] -def get_pace_params(): - est = pytz.timezone('US/Eastern') - now = datetime.now(est) - today = now.date() - days_to_next_friday = (4 - today.weekday()) % 7 - next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) + - timedelta(days=days_to_next_friday)) - if now > next_friday: - next_friday += timedelta(days=7) - days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60) - tweet_count = get_tweets_since_last_friday() - return tweet_count, days_to_next_friday - - -def calculate_tweet_pace(): - tweet_count, days_to_next_friday = get_pace_params() - pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count - return round(pace, 6) if pace > 0 else float(tweet_count) - - -def calculate_pace_decline_rate(): - tweet_count, days_to_next_friday = get_pace_params() - T = 7 - decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2) - decline_per_hour = decline_per_day / 24 - return round(decline_per_hour, 2) - - -def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace): - future_days = days_to_next_friday - (hours / 24) - new_tweet_count = tweet_count + increment - if future_days <= 0: - return round(new_tweet_count, 2) - new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count - return round(max(new_pace, new_tweet_count), 2) - - -def calculate_pace_increase_in_hour(increment_value, hour_value): - tweet_count, days_to_next_friday = get_pace_params() - current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count - increments = [0, 1, 5, 10, 20] - pace_increases = {} - for inc in increments: - pace_increases[f'increase_{inc}'] = calculate_pace_for_increment( - inc, 1, tweet_count, days_to_next_friday, current_pace - ) - if increment_value is None or hour_value is None: - pace_increases['custom_increment'] = None - else: - increment = int(increment_value) - hours = int(hour_value) - pace_increases['custom_increment'] = calculate_pace_for_increment( - increment, hours, tweet_count, days_to_next_friday, current_pace - ) - pace_increases['custom_increment_key'] = increment - return pace_increases - -def calculate_avg_tweets_per_day(target, now, remain): - Xi = get_hourly_weighted_array() - if remain <= 0: - return "remain<=0" - if target <= now: - return "Already reach" - - fx = max(remain - 12, 0) - - if remain > 12: - fy = sum(Xi[0:12]) * 24 - else: - full_hours = int(remain) - fractional_hour = remain - full_hours - if full_hours >= 24: - full_hours = 23 - fractional_hour = 0 - - if full_hours < 0: - full_hours = 0 - - if full_hours > 0: - fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour - else: - fy = Xi[0] * fractional_hour - fy *= 24 - - if fx + fy == 0: - return "fx + fy = 0" - - result = (target - now) / ((fx + fy) / 24) - return round(result, 2) \ No newline at end of file diff --git a/pkg/dash/func/info_func.py b/pkg/dash/func/info_func.py new file mode 100644 index 0000000..7e9f40a --- /dev/null +++ b/pkg/dash/func/info_func.py @@ -0,0 +1,218 @@ +import pytz +from pkg.tool import get_tweets_since_last_friday, aggregate_data +import numpy as np +from scipy.stats import norm +from datetime import timedelta, datetime +from pkg.config import render_data + +def get_last_7_days_data(): + est = pytz.timezone('US/Eastern') + now = datetime.now(est).date() + last_7_days = [now - timedelta(days=i) for i in range(7)] + data = render_data.global_agg_df[ + render_data.global_agg_df['date'].isin(last_7_days)].copy() + return data + +def get_hourly_weighted_array(): + data = get_last_7_days_data() + if data.empty: + return [1 / 24] * 24 + + agg_data = aggregate_data(data, 60) + one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index() + tweet_count_total = one_day_data['tweet_count'].sum() + + hourly_rates = [0] * 24 + for _, row in one_day_data.iterrows(): + minute = row['interval_group'] + hour = int(minute // 60) + if hour < 24: + hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0 + + total_rate = sum(hourly_rates) + if total_rate > 0: + hourly_rates = [rate / total_rate for rate in hourly_rates] + else: + hourly_rates = [1 / 24] * 24 + return hourly_rates + +def calculate_variance_factor(): + data = get_last_7_days_data() + if data.empty or 'tweet_count' not in data.columns: + return 1.5 + + data['hour'] = data['minute_of_day'] // 60 + hourly_data = data.groupby(['date', 'hour'])['tweet_count'].sum().reset_index() + hourly_stats = hourly_data.groupby('hour')['tweet_count'].agg(['mean', 'var']).reset_index() + variance_factors = hourly_stats['var'] / hourly_stats['mean'] + return np.mean(variance_factors[variance_factors.notna()]) or 1.5 + +def get_dynamic_hourly_weights(): + data = get_last_7_days_data() + if data.empty: + return [1 / 24] * 24 + + weights = [0.2, 0.2, 0.3, 0.3, 0.5, 0.5, 0.5] + hourly_rates = [0] * 24 + + for day_idx, day in enumerate(get_last_7_days_data()['date'].unique()): + day_data = data[data['date'] == day].copy() + if day_data.empty: + continue + agg_data = aggregate_data(day_data, 60) + day_tweets = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index() + day_total = day_tweets['tweet_count'].sum() + for _, row in day_tweets.iterrows(): + minute = row['interval_group'] + hour = int(minute // 60) + if hour < 24: + hourly_rates[hour] += (row['tweet_count'] / day_total if day_total > 0 else 0) * weights[day_idx % 7] + + total_rate = sum(hourly_rates) + if total_rate > 0: + hourly_rates = [rate / total_rate for rate in hourly_rates] + else: + hourly_rates = [1 / 24] * 24 + return hourly_rates + +def get_pace_params(): + est = pytz.timezone('US/Eastern') + now = datetime.now(est) + today = now.date() + days_to_next_friday = (4 - today.weekday()) % 7 + next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) + + timedelta(days=days_to_next_friday)) + if now > next_friday: + next_friday += timedelta(days=7) + days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60) + tweet_count = get_tweets_since_last_friday() + return tweet_count, days_to_next_friday + +def calculate_tweet_pace(): + tweet_count, days_to_next_friday = get_pace_params() + pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count + return round(pace, 6) if pace > 0 else float(tweet_count) + +def calculate_pace_decline_rate(): + tweet_count, days_to_next_friday = get_pace_params() + T = 7 + decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2) + decline_per_hour = decline_per_day / 24 + return round(decline_per_hour, 2) + +def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace): + future_days = days_to_next_friday - (hours / 24) + new_tweet_count = tweet_count + increment + if future_days <= 0: + return round(new_tweet_count, 2) + new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count + return round(max(new_pace, new_tweet_count), 2) + +def calculate_pace_increase_in_hour(increment_value, hour_value): + tweet_count, days_to_next_friday = get_pace_params() + current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count + increments = [0, 1, 5, 10, 20] + pace_increases = {} + for inc in increments: + pace_increases[f'increase_{inc}'] = calculate_pace_for_increment( + inc, 1, tweet_count, days_to_next_friday, current_pace + ) + if increment_value is None or hour_value is None: + pace_increases['custom_increment'] = None + else: + increment = int(increment_value) + hours = int(hour_value) + pace_increases['custom_increment'] = calculate_pace_for_increment( + increment, hours, tweet_count, days_to_next_friday, current_pace + ) + pace_increases['custom_increment_key'] = increment + return pace_increases + +def calculate_avg_tweets_per_day(target, now, remain): + Xi = get_hourly_weighted_array() + if remain <= 0: + return "remain<=0" + if target <= now: + return "Already reach" + + fx = max(remain - 12, 0) + + if remain > 12: + fy = sum(Xi[0:12]) * 24 + else: + full_hours = int(remain) + fractional_hour = remain - full_hours + if full_hours >= 24: + full_hours = 23 + fractional_hour = 0 + + if full_hours < 0: + full_hours = 0 + + if full_hours > 0: + fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour + else: + fy = Xi[0] * fractional_hour + fy *= 24 + + if fx + fy == 0: + return "fx + fy = 0" + + result = (target - now) / ((fx + fy) / 24) + return round(result, 2) + +def calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end, peak_percentile=75): + remaining_hours = days_to_next_friday * 24 + hourly_weights = get_dynamic_hourly_weights() + + data = get_last_7_days_data() + if data.empty: + recent_tweets = [70] * 7 + else: + agg_data = aggregate_data(data, 1440) + daily_tweets = agg_data.groupby('date')['tweet_count'].sum().reset_index() + recent_tweets = daily_tweets['tweet_count'].tolist()[-7:] + if len(recent_tweets) < 7: + recent_tweets = recent_tweets + [70] * (7 - len(recent_tweets)) + + recent_3_days = np.mean(recent_tweets[-3:]) + past_4_days = np.mean(recent_tweets[:-3]) if len(recent_tweets) > 3 else 70 + daily_avg = 0.8 * recent_3_days + 0.2 * past_4_days + daily_avg_std = np.std(recent_tweets) if len(recent_tweets) >= 7 else np.std([70] * 7) + + peak_threshold = np.percentile(hourly_weights, peak_percentile) + segments = [] + current_segment = [] + for i in range(24): + if hourly_weights[i] >= peak_threshold: + current_segment.append(i) + elif current_segment: + segments.append(current_segment) + current_segment = [] + if current_segment: + segments.append(current_segment) + + lambda_remaining = 0 + variance_factor = calculate_variance_factor() + total_weight = sum(hourly_weights) + for segment in segments: + hours_in_segment = len(segment) * (remaining_hours / 24) + segment_weight_avg = np.mean([hourly_weights[i] for i in segment]) + lambda_segment = daily_avg * (hours_in_segment / remaining_hours) * (segment_weight_avg / (total_weight / 24)) + lambda_remaining += lambda_segment + + mu_low = (daily_avg - daily_avg_std) * (remaining_hours / 24) + mu_high = (daily_avg + daily_avg_std) * (remaining_hours / 24) + var_low = mu_low * variance_factor + var_high = mu_high * variance_factor + sigma_low = np.sqrt(var_low) + sigma_high = np.sqrt(var_high) + + a = prob_start - tweet_count + b = prob_end - tweet_count + if a < 0: + return "0.0000 - 0.0000" if tweet_count > prob_end else "1.0000 - 1.0000" + + prob_low = norm.cdf((b - mu_low) / sigma_low) - norm.cdf((a - mu_low) / sigma_low) + prob_high = norm.cdf((b - mu_high) / sigma_high) - norm.cdf((a - mu_high) / sigma_high) + return f"{prob_low:.4f} - {prob_high:.4f}" \ No newline at end of file diff --git a/pkg/dash/func/info_m.py b/pkg/dash/func/info_m.py new file mode 100644 index 0000000..3c8b446 --- /dev/null +++ b/pkg/dash/func/info_m.py @@ -0,0 +1,37 @@ +from pkg.dash.func.info_func import * +from pkg.dash.app_init import app +from dash.dependencies import Input, Output +from dash import html + +@app.callback( + [Output('manual-info-tooltip', 'children')], + [Input('update-button', 'n_clicks'), + Input('prob-start-input', 'value'), + Input('prob-end-input', 'value')] +) +def update_info_manual(n_clicks, prob_start, prob_end): + if n_clicks == 0: + return [html.Div("Click 'Manual Update' to see results.")] + + tweet_count, days_to_next_friday = get_pace_params() + prob_start = int(prob_start) if prob_start is not None else 525 + prob_end = int(prob_end) if prob_end is not None else 549 + + probability = calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end) + + prob_low, prob_high = map(float, probability.split(" - ")) + formatted_probability = f"{prob_low * 100:.2f}% - {prob_high * 100:.2f}%" + + pace_table_rows = [ + html.Tr([ + html.Th(f"Probability ({prob_start}-{prob_end})", colSpan=2, style={'paddingRight': '10px'}), + html.Td(formatted_probability, colSpan=6, style={'paddingRight': '10px'}) + ]) + ] + pace_table = html.Table(pace_table_rows, style={ + 'width': '100%', + 'textAlign': 'left', + 'borderCollapse': 'collapse' + }) + return [pace_table] + diff --git a/pkg/tool.py b/pkg/tool.py index a7c92d6..5ab23b8 100644 --- a/pkg/tool.py +++ b/pkg/tool.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta import pandas as pd +import numpy as np from pkg.config import render_data import pytz @@ -84,7 +85,7 @@ def get_time_since_last_tweet(): return 0.0 latest_tweet_time = df['datetime_est'].max() time_diff = now_est - latest_tweet_time - days_diff = time_diff.total_seconds() / (24 * 60 * 60) # 转换为天数 + days_diff = time_diff.total_seconds() / (24 * 60 * 60) return days_diff def format_time_str(days_to_next_friday): @@ -97,31 +98,3 @@ def format_time_str(days_to_next_friday): return f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s ({total_hours}h)" -def get_hourly_weighted_array(): - est = pytz.timezone('US/Eastern') - now = datetime.now(est).date() - last_7_days = [now - timedelta(days=i) for i in range(7)] - - multi_data_agg = render_data.global_agg_df[ - render_data.global_agg_df['date'].isin(last_7_days)].copy() - - if multi_data_agg.empty: - return [1 / 24] * 24 - - agg_data = aggregate_data(multi_data_agg, 60) - one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index() - tweet_count_total = one_day_data['tweet_count'].sum() - - hourly_rates = [0] * 24 - for _, row in one_day_data.iterrows(): - minute = row['interval_group'] - hour = int(minute // 60) - if hour < 24: - hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0 - - total_rate = sum(hourly_rates) - if total_rate > 0: - hourly_rates = [rate / total_rate for rate in hourly_rates] - else: - hourly_rates = [1 / 24] * 24 - return hourly_rates \ No newline at end of file