diff --git a/pkg/dash/app_html.py b/pkg/dash/app_html.py
index 8bbbab9..04333be 100644
--- a/pkg/dash/app_html.py
+++ b/pkg/dash/app_html.py
@@ -88,7 +88,7 @@ def layout_config(app):
}
),
html.A(
- href='https://x.com/elonmusk',
+ href='https://x.com/elonmusk/with_replies',
target='_blank',
children=[
html.Img(
@@ -193,7 +193,38 @@ def layout_config(app):
style={'width': '100%'}
)
)
- ])
+ ]),
+ html.Tr([
+ html.Td("Predict Tweets Start:", style={'paddingRight': '10px'}),
+ html.Td(
+ dcc.Input(
+ id='prob-start-input',
+ type='number',
+ placeholder='输入 Probability Start 值',
+ value=525,
+ style={'width': '100%'}
+ )
+ )
+ ]),
+ html.Tr([
+ html.Td("Predict Tweets End:", style={'paddingRight': '10px'}),
+ html.Td(
+ dcc.Input(
+ id='prob-end-input',
+ type='number',
+ placeholder='输入 Probability End 值',
+ value=549,
+ style={'width': '100%'}
+ )
+ )
+ ]),
+ html.Tr([
+ html.Td("Calculate Probability:", style={'paddingRight': '10px'}),
+ html.Td(
+ html.Button('Calculate', id='update-button', n_clicks=0)
+ )
+ ]),
+ html.Tr(id='manual-info-tooltip', style={'margin': '10px'})
], style={
'width': '50%',
'marginTop': '10px',
diff --git a/pkg/dash/func/info.py b/pkg/dash/func/info.py
index 72418fc..a17ece4 100644
--- a/pkg/dash/func/info.py
+++ b/pkg/dash/func/info.py
@@ -1,9 +1,7 @@
-import pytz
-from pkg.tool import get_tweets_since_last_friday, format_time_str, get_time_since_last_tweet, get_hourly_weighted_array
+from pkg.dash.func.info_func import *
+from pkg.tool import format_time_str, get_time_since_last_tweet
from pkg.dash.app_init import app
from dash.dependencies import Input, Output
-from datetime import timedelta
-from datetime import datetime
from dash import html
@@ -87,92 +85,3 @@ def update_info(n, target_value, increment_value, hour_value):
})
return [pace_table]
-def get_pace_params():
- est = pytz.timezone('US/Eastern')
- now = datetime.now(est)
- today = now.date()
- days_to_next_friday = (4 - today.weekday()) % 7
- next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) +
- timedelta(days=days_to_next_friday))
- if now > next_friday:
- next_friday += timedelta(days=7)
- days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60)
- tweet_count = get_tweets_since_last_friday()
- return tweet_count, days_to_next_friday
-
-
-def calculate_tweet_pace():
- tweet_count, days_to_next_friday = get_pace_params()
- pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
- return round(pace, 6) if pace > 0 else float(tweet_count)
-
-
-def calculate_pace_decline_rate():
- tweet_count, days_to_next_friday = get_pace_params()
- T = 7
- decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2)
- decline_per_hour = decline_per_day / 24
- return round(decline_per_hour, 2)
-
-
-def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace):
- future_days = days_to_next_friday - (hours / 24)
- new_tweet_count = tweet_count + increment
- if future_days <= 0:
- return round(new_tweet_count, 2)
- new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count
- return round(max(new_pace, new_tweet_count), 2)
-
-
-def calculate_pace_increase_in_hour(increment_value, hour_value):
- tweet_count, days_to_next_friday = get_pace_params()
- current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
- increments = [0, 1, 5, 10, 20]
- pace_increases = {}
- for inc in increments:
- pace_increases[f'increase_{inc}'] = calculate_pace_for_increment(
- inc, 1, tweet_count, days_to_next_friday, current_pace
- )
- if increment_value is None or hour_value is None:
- pace_increases['custom_increment'] = None
- else:
- increment = int(increment_value)
- hours = int(hour_value)
- pace_increases['custom_increment'] = calculate_pace_for_increment(
- increment, hours, tweet_count, days_to_next_friday, current_pace
- )
- pace_increases['custom_increment_key'] = increment
- return pace_increases
-
-def calculate_avg_tweets_per_day(target, now, remain):
- Xi = get_hourly_weighted_array()
- if remain <= 0:
- return "remain<=0"
- if target <= now:
- return "Already reach"
-
- fx = max(remain - 12, 0)
-
- if remain > 12:
- fy = sum(Xi[0:12]) * 24
- else:
- full_hours = int(remain)
- fractional_hour = remain - full_hours
- if full_hours >= 24:
- full_hours = 23
- fractional_hour = 0
-
- if full_hours < 0:
- full_hours = 0
-
- if full_hours > 0:
- fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour
- else:
- fy = Xi[0] * fractional_hour
- fy *= 24
-
- if fx + fy == 0:
- return "fx + fy = 0"
-
- result = (target - now) / ((fx + fy) / 24)
- return round(result, 2)
\ No newline at end of file
diff --git a/pkg/dash/func/info_func.py b/pkg/dash/func/info_func.py
new file mode 100644
index 0000000..7e9f40a
--- /dev/null
+++ b/pkg/dash/func/info_func.py
@@ -0,0 +1,218 @@
+import pytz
+from pkg.tool import get_tweets_since_last_friday, aggregate_data
+import numpy as np
+from scipy.stats import norm
+from datetime import timedelta, datetime
+from pkg.config import render_data
+
+def get_last_7_days_data():
+ est = pytz.timezone('US/Eastern')
+ now = datetime.now(est).date()
+ last_7_days = [now - timedelta(days=i) for i in range(7)]
+ data = render_data.global_agg_df[
+ render_data.global_agg_df['date'].isin(last_7_days)].copy()
+ return data
+
+def get_hourly_weighted_array():
+ data = get_last_7_days_data()
+ if data.empty:
+ return [1 / 24] * 24
+
+ agg_data = aggregate_data(data, 60)
+ one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
+ tweet_count_total = one_day_data['tweet_count'].sum()
+
+ hourly_rates = [0] * 24
+ for _, row in one_day_data.iterrows():
+ minute = row['interval_group']
+ hour = int(minute // 60)
+ if hour < 24:
+ hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0
+
+ total_rate = sum(hourly_rates)
+ if total_rate > 0:
+ hourly_rates = [rate / total_rate for rate in hourly_rates]
+ else:
+ hourly_rates = [1 / 24] * 24
+ return hourly_rates
+
+def calculate_variance_factor():
+ data = get_last_7_days_data()
+ if data.empty or 'tweet_count' not in data.columns:
+ return 1.5
+
+ data['hour'] = data['minute_of_day'] // 60
+ hourly_data = data.groupby(['date', 'hour'])['tweet_count'].sum().reset_index()
+ hourly_stats = hourly_data.groupby('hour')['tweet_count'].agg(['mean', 'var']).reset_index()
+ variance_factors = hourly_stats['var'] / hourly_stats['mean']
+ return np.mean(variance_factors[variance_factors.notna()]) or 1.5
+
+def get_dynamic_hourly_weights():
+ data = get_last_7_days_data()
+ if data.empty:
+ return [1 / 24] * 24
+
+ weights = [0.2, 0.2, 0.3, 0.3, 0.5, 0.5, 0.5]
+ hourly_rates = [0] * 24
+
+ for day_idx, day in enumerate(get_last_7_days_data()['date'].unique()):
+ day_data = data[data['date'] == day].copy()
+ if day_data.empty:
+ continue
+ agg_data = aggregate_data(day_data, 60)
+ day_tweets = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
+ day_total = day_tweets['tweet_count'].sum()
+ for _, row in day_tweets.iterrows():
+ minute = row['interval_group']
+ hour = int(minute // 60)
+ if hour < 24:
+ hourly_rates[hour] += (row['tweet_count'] / day_total if day_total > 0 else 0) * weights[day_idx % 7]
+
+ total_rate = sum(hourly_rates)
+ if total_rate > 0:
+ hourly_rates = [rate / total_rate for rate in hourly_rates]
+ else:
+ hourly_rates = [1 / 24] * 24
+ return hourly_rates
+
+def get_pace_params():
+ est = pytz.timezone('US/Eastern')
+ now = datetime.now(est)
+ today = now.date()
+ days_to_next_friday = (4 - today.weekday()) % 7
+ next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) +
+ timedelta(days=days_to_next_friday))
+ if now > next_friday:
+ next_friday += timedelta(days=7)
+ days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60)
+ tweet_count = get_tweets_since_last_friday()
+ return tweet_count, days_to_next_friday
+
+def calculate_tweet_pace():
+ tweet_count, days_to_next_friday = get_pace_params()
+ pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
+ return round(pace, 6) if pace > 0 else float(tweet_count)
+
+def calculate_pace_decline_rate():
+ tweet_count, days_to_next_friday = get_pace_params()
+ T = 7
+ decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2)
+ decline_per_hour = decline_per_day / 24
+ return round(decline_per_hour, 2)
+
+def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace):
+ future_days = days_to_next_friday - (hours / 24)
+ new_tweet_count = tweet_count + increment
+ if future_days <= 0:
+ return round(new_tweet_count, 2)
+ new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count
+ return round(max(new_pace, new_tweet_count), 2)
+
+def calculate_pace_increase_in_hour(increment_value, hour_value):
+ tweet_count, days_to_next_friday = get_pace_params()
+ current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
+ increments = [0, 1, 5, 10, 20]
+ pace_increases = {}
+ for inc in increments:
+ pace_increases[f'increase_{inc}'] = calculate_pace_for_increment(
+ inc, 1, tweet_count, days_to_next_friday, current_pace
+ )
+ if increment_value is None or hour_value is None:
+ pace_increases['custom_increment'] = None
+ else:
+ increment = int(increment_value)
+ hours = int(hour_value)
+ pace_increases['custom_increment'] = calculate_pace_for_increment(
+ increment, hours, tweet_count, days_to_next_friday, current_pace
+ )
+ pace_increases['custom_increment_key'] = increment
+ return pace_increases
+
+def calculate_avg_tweets_per_day(target, now, remain):
+ Xi = get_hourly_weighted_array()
+ if remain <= 0:
+ return "remain<=0"
+ if target <= now:
+ return "Already reach"
+
+ fx = max(remain - 12, 0)
+
+ if remain > 12:
+ fy = sum(Xi[0:12]) * 24
+ else:
+ full_hours = int(remain)
+ fractional_hour = remain - full_hours
+ if full_hours >= 24:
+ full_hours = 23
+ fractional_hour = 0
+
+ if full_hours < 0:
+ full_hours = 0
+
+ if full_hours > 0:
+ fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour
+ else:
+ fy = Xi[0] * fractional_hour
+ fy *= 24
+
+ if fx + fy == 0:
+ return "fx + fy = 0"
+
+ result = (target - now) / ((fx + fy) / 24)
+ return round(result, 2)
+
+def calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end, peak_percentile=75):
+ remaining_hours = days_to_next_friday * 24
+ hourly_weights = get_dynamic_hourly_weights()
+
+ data = get_last_7_days_data()
+ if data.empty:
+ recent_tweets = [70] * 7
+ else:
+ agg_data = aggregate_data(data, 1440)
+ daily_tweets = agg_data.groupby('date')['tweet_count'].sum().reset_index()
+ recent_tweets = daily_tweets['tweet_count'].tolist()[-7:]
+ if len(recent_tweets) < 7:
+ recent_tweets = recent_tweets + [70] * (7 - len(recent_tweets))
+
+ recent_3_days = np.mean(recent_tweets[-3:])
+ past_4_days = np.mean(recent_tweets[:-3]) if len(recent_tweets) > 3 else 70
+ daily_avg = 0.8 * recent_3_days + 0.2 * past_4_days
+ daily_avg_std = np.std(recent_tweets) if len(recent_tweets) >= 7 else np.std([70] * 7)
+
+ peak_threshold = np.percentile(hourly_weights, peak_percentile)
+ segments = []
+ current_segment = []
+ for i in range(24):
+ if hourly_weights[i] >= peak_threshold:
+ current_segment.append(i)
+ elif current_segment:
+ segments.append(current_segment)
+ current_segment = []
+ if current_segment:
+ segments.append(current_segment)
+
+ lambda_remaining = 0
+ variance_factor = calculate_variance_factor()
+ total_weight = sum(hourly_weights)
+ for segment in segments:
+ hours_in_segment = len(segment) * (remaining_hours / 24)
+ segment_weight_avg = np.mean([hourly_weights[i] for i in segment])
+ lambda_segment = daily_avg * (hours_in_segment / remaining_hours) * (segment_weight_avg / (total_weight / 24))
+ lambda_remaining += lambda_segment
+
+ mu_low = (daily_avg - daily_avg_std) * (remaining_hours / 24)
+ mu_high = (daily_avg + daily_avg_std) * (remaining_hours / 24)
+ var_low = mu_low * variance_factor
+ var_high = mu_high * variance_factor
+ sigma_low = np.sqrt(var_low)
+ sigma_high = np.sqrt(var_high)
+
+ a = prob_start - tweet_count
+ b = prob_end - tweet_count
+ if a < 0:
+ return "0.0000 - 0.0000" if tweet_count > prob_end else "1.0000 - 1.0000"
+
+ prob_low = norm.cdf((b - mu_low) / sigma_low) - norm.cdf((a - mu_low) / sigma_low)
+ prob_high = norm.cdf((b - mu_high) / sigma_high) - norm.cdf((a - mu_high) / sigma_high)
+ return f"{prob_low:.4f} - {prob_high:.4f}"
\ No newline at end of file
diff --git a/pkg/dash/func/info_m.py b/pkg/dash/func/info_m.py
new file mode 100644
index 0000000..3c8b446
--- /dev/null
+++ b/pkg/dash/func/info_m.py
@@ -0,0 +1,37 @@
+from pkg.dash.func.info_func import *
+from pkg.dash.app_init import app
+from dash.dependencies import Input, Output
+from dash import html
+
+@app.callback(
+ [Output('manual-info-tooltip', 'children')],
+ [Input('update-button', 'n_clicks'),
+ Input('prob-start-input', 'value'),
+ Input('prob-end-input', 'value')]
+)
+def update_info_manual(n_clicks, prob_start, prob_end):
+ if n_clicks == 0:
+ return [html.Div("Click 'Manual Update' to see results.")]
+
+ tweet_count, days_to_next_friday = get_pace_params()
+ prob_start = int(prob_start) if prob_start is not None else 525
+ prob_end = int(prob_end) if prob_end is not None else 549
+
+ probability = calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end)
+
+ prob_low, prob_high = map(float, probability.split(" - "))
+ formatted_probability = f"{prob_low * 100:.2f}% - {prob_high * 100:.2f}%"
+
+ pace_table_rows = [
+ html.Tr([
+ html.Th(f"Probability ({prob_start}-{prob_end})", colSpan=2, style={'paddingRight': '10px'}),
+ html.Td(formatted_probability, colSpan=6, style={'paddingRight': '10px'})
+ ])
+ ]
+ pace_table = html.Table(pace_table_rows, style={
+ 'width': '100%',
+ 'textAlign': 'left',
+ 'borderCollapse': 'collapse'
+ })
+ return [pace_table]
+
diff --git a/pkg/tool.py b/pkg/tool.py
index a7c92d6..5ab23b8 100644
--- a/pkg/tool.py
+++ b/pkg/tool.py
@@ -1,5 +1,6 @@
from datetime import datetime, timedelta
import pandas as pd
+import numpy as np
from pkg.config import render_data
import pytz
@@ -84,7 +85,7 @@ def get_time_since_last_tweet():
return 0.0
latest_tweet_time = df['datetime_est'].max()
time_diff = now_est - latest_tweet_time
- days_diff = time_diff.total_seconds() / (24 * 60 * 60) # 转换为天数
+ days_diff = time_diff.total_seconds() / (24 * 60 * 60)
return days_diff
def format_time_str(days_to_next_friday):
@@ -97,31 +98,3 @@ def format_time_str(days_to_next_friday):
return f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s ({total_hours}h)"
-def get_hourly_weighted_array():
- est = pytz.timezone('US/Eastern')
- now = datetime.now(est).date()
- last_7_days = [now - timedelta(days=i) for i in range(7)]
-
- multi_data_agg = render_data.global_agg_df[
- render_data.global_agg_df['date'].isin(last_7_days)].copy()
-
- if multi_data_agg.empty:
- return [1 / 24] * 24
-
- agg_data = aggregate_data(multi_data_agg, 60)
- one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
- tweet_count_total = one_day_data['tweet_count'].sum()
-
- hourly_rates = [0] * 24
- for _, row in one_day_data.iterrows():
- minute = row['interval_group']
- hour = int(minute // 60)
- if hour < 24:
- hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0
-
- total_rate = sum(hourly_rates)
- if total_rate > 0:
- hourly_rates = [rate / total_rate for rate in hourly_rates]
- else:
- hourly_rates = [1 / 24] * 24
- return hourly_rates
\ No newline at end of file