From a8125246c2efe4dd05b45a49a0bebf82332db6c3 Mon Sep 17 00:00:00 2001
From: NY <nnn@qq.com>
Date: Fri, 14 Mar 2025 15:08:29 +0800
Subject: [PATCH] +probability(unfinished)

---
 pkg/dash/app_html.py       |  35 +++++-
 pkg/dash/func/info.py      |  95 +---------------
 pkg/dash/func/info_func.py | 218 +++++++++++++++++++++++++++++++++++++
 pkg/dash/func/info_m.py    |  37 +++++++
 pkg/tool.py                |  31 +-----
 5 files changed, 292 insertions(+), 124 deletions(-)
 create mode 100644 pkg/dash/func/info_func.py
 create mode 100644 pkg/dash/func/info_m.py

diff --git a/pkg/dash/app_html.py b/pkg/dash/app_html.py
index 8bbbab9..04333be 100644
--- a/pkg/dash/app_html.py
+++ b/pkg/dash/app_html.py
@@ -88,7 +88,7 @@ def layout_config(app):
                     }
                 ),
                 html.A(
-                    href='https://x.com/elonmusk',
+                    href='https://x.com/elonmusk/with_replies',
                     target='_blank',
                     children=[
                         html.Img(
@@ -193,7 +193,38 @@ def layout_config(app):
                             style={'width': '100%'}
                         )
                     )
-                ])
+                ]),
+                html.Tr([
+                    html.Td("Predict Tweets Start:", style={'paddingRight': '10px'}),
+                    html.Td(
+                        dcc.Input(
+                            id='prob-start-input',
+                            type='number',
+                            placeholder='输入 Probability Start 值',
+                            value=525,
+                            style={'width': '100%'}
+                        )
+                    )
+                ]),
+                html.Tr([
+                    html.Td("Predict Tweets End:", style={'paddingRight': '10px'}),
+                    html.Td(
+                        dcc.Input(
+                            id='prob-end-input',
+                            type='number',
+                            placeholder='输入 Probability End 值',
+                            value=549,
+                            style={'width': '100%'}
+                        )
+                    )
+                ]),
+                html.Tr([
+                    html.Td("Calculate Probability:", style={'paddingRight': '10px'}),
+                    html.Td(
+                        html.Button('Calculate', id='update-button', n_clicks=0)
+                    )
+                ]),
+                html.Tr(id='manual-info-tooltip', style={'margin': '10px'})
             ], style={
                 'width': '50%',
                 'marginTop': '10px',
diff --git a/pkg/dash/func/info.py b/pkg/dash/func/info.py
index 72418fc..a17ece4 100644
--- a/pkg/dash/func/info.py
+++ b/pkg/dash/func/info.py
@@ -1,9 +1,7 @@
-import pytz
-from pkg.tool import get_tweets_since_last_friday, format_time_str, get_time_since_last_tweet, get_hourly_weighted_array
+from pkg.dash.func.info_func import *
+from pkg.tool import format_time_str, get_time_since_last_tweet
 from pkg.dash.app_init import app
 from dash.dependencies import Input, Output
-from datetime import timedelta
-from datetime import datetime
 from dash import html
 
 
@@ -87,92 +85,3 @@ def update_info(n, target_value, increment_value, hour_value):
     })
     return [pace_table]
 
-def get_pace_params():
-    est = pytz.timezone('US/Eastern')
-    now = datetime.now(est)
-    today = now.date()
-    days_to_next_friday = (4 - today.weekday()) % 7
-    next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) +
-                   timedelta(days=days_to_next_friday))
-    if now > next_friday:
-        next_friday += timedelta(days=7)
-    days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60)
-    tweet_count = get_tweets_since_last_friday()
-    return tweet_count, days_to_next_friday
-
-
-def calculate_tweet_pace():
-    tweet_count, days_to_next_friday = get_pace_params()
-    pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
-    return round(pace, 6) if pace > 0 else float(tweet_count)
-
-
-def calculate_pace_decline_rate():
-    tweet_count, days_to_next_friday = get_pace_params()
-    T = 7
-    decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2)
-    decline_per_hour = decline_per_day / 24
-    return round(decline_per_hour, 2)
-
-
-def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace):
-    future_days = days_to_next_friday - (hours / 24)
-    new_tweet_count = tweet_count + increment
-    if future_days <= 0:
-        return round(new_tweet_count, 2)
-    new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count
-    return round(max(new_pace, new_tweet_count), 2)
-
-
-def calculate_pace_increase_in_hour(increment_value, hour_value):
-    tweet_count, days_to_next_friday = get_pace_params()
-    current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
-    increments = [0, 1, 5, 10, 20]
-    pace_increases = {}
-    for inc in increments:
-        pace_increases[f'increase_{inc}'] = calculate_pace_for_increment(
-            inc, 1, tweet_count, days_to_next_friday, current_pace
-        )
-    if increment_value is None or hour_value is None:
-        pace_increases['custom_increment'] = None
-    else:
-        increment = int(increment_value)
-        hours = int(hour_value)
-        pace_increases['custom_increment'] = calculate_pace_for_increment(
-            increment, hours, tweet_count, days_to_next_friday, current_pace
-        )
-        pace_increases['custom_increment_key'] = increment
-    return pace_increases
-
-def calculate_avg_tweets_per_day(target, now, remain):
-    Xi = get_hourly_weighted_array()
-    if remain <= 0:
-        return "remain<=0"
-    if target <= now:
-        return "Already reach"
-
-    fx = max(remain - 12, 0)
-
-    if remain > 12:
-        fy = sum(Xi[0:12]) * 24
-    else:
-        full_hours = int(remain)
-        fractional_hour = remain - full_hours
-        if full_hours >= 24:
-            full_hours = 23
-            fractional_hour = 0
-
-        if full_hours < 0:
-            full_hours = 0
-
-        if full_hours > 0:
-            fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour
-        else:
-            fy = Xi[0] * fractional_hour
-        fy *= 24
-
-    if fx + fy == 0:
-        return "fx + fy = 0"
-
-    result = (target - now) / ((fx + fy) / 24)
-    return round(result, 2)
\ No newline at end of file
diff --git a/pkg/dash/func/info_func.py b/pkg/dash/func/info_func.py
new file mode 100644
index 0000000..7e9f40a
--- /dev/null
+++ b/pkg/dash/func/info_func.py
@@ -0,0 +1,218 @@
+import pytz
+from pkg.tool import get_tweets_since_last_friday, aggregate_data
+import numpy as np
+from scipy.stats import norm
+from datetime import timedelta, datetime
+from pkg.config import render_data
+
+def get_last_7_days_data():
+    est = pytz.timezone('US/Eastern')
+    now = datetime.now(est).date()
+    last_7_days = [now - timedelta(days=i) for i in range(7)]
+    data = render_data.global_agg_df[
+        render_data.global_agg_df['date'].isin(last_7_days)].copy()
+    return data
+
+def get_hourly_weighted_array():
+    data = get_last_7_days_data()
+    if data.empty:
+        return [1 / 24] * 24
+
+    agg_data = aggregate_data(data, 60)
+    one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
+    tweet_count_total = one_day_data['tweet_count'].sum()
+
+    hourly_rates = [0] * 24
+    for _, row in one_day_data.iterrows():
+        minute = row['interval_group']
+        hour = int(minute // 60)
+        if hour < 24:
+            hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0
+
+    total_rate = sum(hourly_rates)
+    if total_rate > 0:
+        hourly_rates = [rate / total_rate for rate in hourly_rates]
+    else:
+        hourly_rates = [1 / 24] * 24
+    return hourly_rates
+
+def calculate_variance_factor():
+    data = get_last_7_days_data()
+    if data.empty or 'tweet_count' not in data.columns:
+        return 1.5
+
+    data['hour'] = data['minute_of_day'] // 60
+    hourly_data = data.groupby(['date', 'hour'])['tweet_count'].sum().reset_index()
+    hourly_stats = hourly_data.groupby('hour')['tweet_count'].agg(['mean', 'var']).reset_index()
+    variance_factors = hourly_stats['var'] / hourly_stats['mean']
+    return np.mean(variance_factors[variance_factors.notna()]) or 1.5
+
+def get_dynamic_hourly_weights():
+    data = get_last_7_days_data()
+    if data.empty:
+        return [1 / 24] * 24
+
+    weights = [0.2, 0.2, 0.3, 0.3, 0.5, 0.5, 0.5]
+    hourly_rates = [0] * 24
+
+    for day_idx, day in enumerate(get_last_7_days_data()['date'].unique()):
+        day_data = data[data['date'] == day].copy()
+        if day_data.empty:
+            continue
+        agg_data = aggregate_data(day_data, 60)
+        day_tweets = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
+        day_total = day_tweets['tweet_count'].sum()
+        for _, row in day_tweets.iterrows():
+            minute = row['interval_group']
+            hour = int(minute // 60)
+            if hour < 24:
+                hourly_rates[hour] += (row['tweet_count'] / day_total if day_total > 0 else 0) * weights[day_idx % 7]
+
+    total_rate = sum(hourly_rates)
+    if total_rate > 0:
+        hourly_rates = [rate / total_rate for rate in hourly_rates]
+    else:
+        hourly_rates = [1 / 24] * 24
+    return hourly_rates
+
+def get_pace_params():
+    est = pytz.timezone('US/Eastern')
+    now = datetime.now(est)
+    today = now.date()
+    days_to_next_friday = (4 - today.weekday()) % 7
+    next_friday = (now.replace(hour=12, minute=0, second=0, microsecond=0) +
+                   timedelta(days=days_to_next_friday))
+    if now > next_friday:
+        next_friday += timedelta(days=7)
+    days_to_next_friday = (next_friday - now).total_seconds() / (24 * 60 * 60)
+    tweet_count = get_tweets_since_last_friday()
+    return tweet_count, days_to_next_friday
+
+def calculate_tweet_pace():
+    tweet_count, days_to_next_friday = get_pace_params()
+    pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
+    return round(pace, 6) if pace > 0 else float(tweet_count)
+
+def calculate_pace_decline_rate():
+    tweet_count, days_to_next_friday = get_pace_params()
+    T = 7
+    decline_per_day = -(tweet_count * T) / ((T - days_to_next_friday) ** 2)
+    decline_per_hour = decline_per_day / 24
+    return round(decline_per_hour, 2)
+
+def calculate_pace_for_increment(increment, hours, tweet_count, days_to_next_friday, current_pace):
+    future_days = days_to_next_friday - (hours / 24)
+    new_tweet_count = tweet_count + increment
+    if future_days <= 0:
+        return round(new_tweet_count, 2)
+    new_pace = (new_tweet_count / (7 - future_days)) * future_days + new_tweet_count
+    return round(max(new_pace, new_tweet_count), 2)
+
+def calculate_pace_increase_in_hour(increment_value, hour_value):
+    tweet_count, days_to_next_friday = get_pace_params()
+    current_pace = (tweet_count / (7 - days_to_next_friday)) * days_to_next_friday + tweet_count
+    increments = [0, 1, 5, 10, 20]
+    pace_increases = {}
+    for inc in increments:
+        pace_increases[f'increase_{inc}'] = calculate_pace_for_increment(
+            inc, 1, tweet_count, days_to_next_friday, current_pace
+        )
+    if increment_value is None or hour_value is None:
+        pace_increases['custom_increment'] = None
+    else:
+        increment = int(increment_value)
+        hours = int(hour_value)
+        pace_increases['custom_increment'] = calculate_pace_for_increment(
+            increment, hours, tweet_count, days_to_next_friday, current_pace
+        )
+        pace_increases['custom_increment_key'] = increment
+    return pace_increases
+
+def calculate_avg_tweets_per_day(target, now, remain):
+    Xi = get_hourly_weighted_array()
+    if remain <= 0:
+        return "remain<=0"
+    if target <= now:
+        return "Already reach"
+
+    fx = max(remain - 12, 0)
+
+    if remain > 12:
+        fy = sum(Xi[0:12]) * 24
+    else:
+        full_hours = int(remain)
+        fractional_hour = remain - full_hours
+        if full_hours >= 24:
+            full_hours = 23
+            fractional_hour = 0
+
+        if full_hours < 0:
+            full_hours = 0
+
+        if full_hours > 0:
+            fy = sum(Xi[0:full_hours]) + Xi[full_hours] * fractional_hour
+        else:
+            fy = Xi[0] * fractional_hour
+        fy *= 24
+
+    if fx + fy == 0:
+        return "fx + fy = 0"
+
+    result = (target - now) / ((fx + fy) / 24)
+    return round(result, 2)
+
+def calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end, peak_percentile=75):
+    remaining_hours = days_to_next_friday * 24
+    hourly_weights = get_dynamic_hourly_weights()
+
+    data = get_last_7_days_data()
+    if data.empty:
+        recent_tweets = [70] * 7
+    else:
+        agg_data = aggregate_data(data, 1440)
+        daily_tweets = agg_data.groupby('date')['tweet_count'].sum().reset_index()
+        recent_tweets = daily_tweets['tweet_count'].tolist()[-7:]
+        if len(recent_tweets) < 7:
+            recent_tweets = recent_tweets + [70] * (7 - len(recent_tweets))
+
+    recent_3_days = np.mean(recent_tweets[-3:])
+    past_4_days = np.mean(recent_tweets[:-3]) if len(recent_tweets) > 3 else 70
+    daily_avg = 0.8 * recent_3_days + 0.2 * past_4_days
+    daily_avg_std = np.std(recent_tweets) if len(recent_tweets) >= 7 else np.std([70] * 7)
+
+    peak_threshold = np.percentile(hourly_weights, peak_percentile)
+    segments = []
+    current_segment = []
+    for i in range(24):
+        if hourly_weights[i] >= peak_threshold:
+            current_segment.append(i)
+        elif current_segment:
+            segments.append(current_segment)
+            current_segment = []
+    if current_segment:
+        segments.append(current_segment)
+
+    lambda_remaining = 0
+    variance_factor = calculate_variance_factor()
+    total_weight = sum(hourly_weights)
+    for segment in segments:
+        hours_in_segment = len(segment) * (remaining_hours / 24)
+        segment_weight_avg = np.mean([hourly_weights[i] for i in segment])
+        lambda_segment = daily_avg * (hours_in_segment / remaining_hours) * (segment_weight_avg / (total_weight / 24))
+        lambda_remaining += lambda_segment
+
+    mu_low = (daily_avg - daily_avg_std) * (remaining_hours / 24)
+    mu_high = (daily_avg + daily_avg_std) * (remaining_hours / 24)
+    var_low = mu_low * variance_factor
+    var_high = mu_high * variance_factor
+    sigma_low = np.sqrt(var_low)
+    sigma_high = np.sqrt(var_high)
+
+    a = prob_start - tweet_count
+    b = prob_end - tweet_count
+    if a < 0:
+        return "0.0000 - 0.0000" if tweet_count > prob_end else "1.0000 - 1.0000"
+
+    prob_low = norm.cdf((b - mu_low) / sigma_low) - norm.cdf((a - mu_low) / sigma_low)
+    prob_high = norm.cdf((b - mu_high) / sigma_high) - norm.cdf((a - mu_high) / sigma_high)
+    return f"{prob_low:.4f} - {prob_high:.4f}"
\ No newline at end of file
diff --git a/pkg/dash/func/info_m.py b/pkg/dash/func/info_m.py
new file mode 100644
index 0000000..3c8b446
--- /dev/null
+++ b/pkg/dash/func/info_m.py
@@ -0,0 +1,37 @@
+from pkg.dash.func.info_func import *
+from pkg.dash.app_init import app
+from dash.dependencies import Input, Output
+from dash import html
+
+@app.callback(
+    [Output('manual-info-tooltip', 'children')],
+    [Input('update-button', 'n_clicks'),
+     Input('prob-start-input', 'value'),
+     Input('prob-end-input', 'value')]
+)
+def update_info_manual(n_clicks, prob_start, prob_end):
+    if n_clicks == 0:
+        return [html.Div("Click 'Manual Update' to see results.")]
+
+    tweet_count, days_to_next_friday = get_pace_params()
+    prob_start = int(prob_start) if prob_start is not None else 525
+    prob_end = int(prob_end) if prob_end is not None else 549
+
+    probability = calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end)
+
+    prob_low, prob_high = map(float, probability.split(" - "))
+    formatted_probability = f"{prob_low * 100:.2f}% - {prob_high * 100:.2f}%"
+
+    pace_table_rows = [
+        html.Tr([
+            html.Th(f"Probability ({prob_start}-{prob_end})", colSpan=2, style={'paddingRight': '10px'}),
+            html.Td(formatted_probability, colSpan=6, style={'paddingRight': '10px'})
+        ])
+    ]
+    pace_table = html.Table(pace_table_rows, style={
+        'width': '100%',
+        'textAlign': 'left',
+        'borderCollapse': 'collapse'
+    })
+    return [pace_table]
+
diff --git a/pkg/tool.py b/pkg/tool.py
index a7c92d6..5ab23b8 100644
--- a/pkg/tool.py
+++ b/pkg/tool.py
@@ -1,5 +1,6 @@
 from datetime import datetime, timedelta
 import pandas as pd
+import numpy as np
 from pkg.config import render_data
 import pytz
 
@@ -84,7 +85,7 @@ def get_time_since_last_tweet():
         return 0.0
     latest_tweet_time = df['datetime_est'].max()
     time_diff = now_est - latest_tweet_time
-    days_diff = time_diff.total_seconds() / (24 * 60 * 60)  # 转换为天数
+    days_diff = time_diff.total_seconds() / (24 * 60 * 60)
     return days_diff
 
 def format_time_str(days_to_next_friday):
@@ -97,31 +98,3 @@ def format_time_str(days_to_next_friday):
     return f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s ({total_hours}h)"
 
 
-def get_hourly_weighted_array():
-    est = pytz.timezone('US/Eastern')
-    now = datetime.now(est).date()
-    last_7_days = [now - timedelta(days=i) for i in range(7)]
-
-    multi_data_agg = render_data.global_agg_df[
-        render_data.global_agg_df['date'].isin(last_7_days)].copy()
-
-    if multi_data_agg.empty:
-        return [1 / 24] * 24
-
-    agg_data = aggregate_data(multi_data_agg, 60)
-    one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
-    tweet_count_total = one_day_data['tweet_count'].sum()
-
-    hourly_rates = [0] * 24
-    for _, row in one_day_data.iterrows():
-        minute = row['interval_group']
-        hour = int(minute // 60)
-        if hour < 24:
-            hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0
-
-    total_rate = sum(hourly_rates)
-    if total_rate > 0:
-        hourly_rates = [rate / total_rate for rate in hourly_rates]
-    else:
-        hourly_rates = [1 / 24] * 24
-    return hourly_rates
\ No newline at end of file