+probability(debug&test)

2025-03-14 15:55:44 +08:00 · 2025-03-14 15:55:44 +08:00 · 8a2614b073
commit 8a2614b073
parent 192bd42d0e
2 changed files with 89 additions and 74 deletions
--- a/pkg/dash/app_html.py
+++ b/pkg/dash/app_html.py
@ -247,13 +247,20 @@ def layout_config(app):
                html.Tr([
                    html.Td("Test Time:", style={'paddingRight': '10px'}),
                    html.Td(
-                        dcc.Input(
-                            id='test-time-input',
-                            type='text',
-                            placeholder='HH:MM:SS (e.g., 12:00:00)',
-                            value='12:00:00',
-                            style={'width': '100%'}
-                        )
+                        html.Div([
+                            dcc.Input(
+                                id='test-time-input',
+                                type='text',
+                                placeholder='HH:MM:SS (e.g., 12:00:00)',  # 增强提示
+                                value='12:00:00',
+                                pattern='[0-2][0-9]:[0-5][0-9]:[0-5][0-9]',  # 限制格式
+                                style={'width': '100%'}
+                            ),
+                            html.Span(
+                                "Enter time in HH:MM:SS format (e.g., 12:00:00)",
+                                style={'fontSize': '12px', 'color': 'gray', 'marginTop': '5px', 'display': 'block'}
+                            )
+                        ])
                    )
                ]),
                html.Tr([
@ -267,7 +274,7 @@ def layout_config(app):
                'width': '50%',
                'marginTop': '10px',
                'borderCollapse': 'collapse'
-            })
+            }),
        ], style={'marginLeft': '50px'}),

        dcc.Interval(id='clock-interval', interval=1000, n_intervals=0)
--- a/pkg/dash/func/info_test.py
+++ b/pkg/dash/func/info_test.py
@ -3,6 +3,7 @@ from pkg.dash.app_init import app
 from dash.dependencies import Input, Output
 from dash import html
 import pandas as pd
+import re
 from datetime import timedelta

@app.callback(
@ -16,91 +17,98 @@ def update_test_info(n_clicks, test_date, test_time):
        return [html.Div("Click 'Test' to see historical probability results.")]

    est = pytz.timezone('US/Eastern')
+    data = render_data.global_agg_df.copy()
+
+    # 调试：打印输入值
+    print(f"test_date: {test_date}, test_time: {test_time}")
+
+    # 检查输入是否为空
+    if not test_date or not test_time:
+        return [html.Div("Date or time input is empty. Please provide both date (YYYY-MM-DD) and time (HH:MM:SS).")]
+
+    # 验证时间格式
+    time_pattern = r'^(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d$'  # HH:MM:SS (00:00:00 to 23:59:59)
+    if not re.match(time_pattern, test_time):
+        return [html.Div("Invalid time format. Use HH:MM:SS (e.g., 12:00:00) with hours 00-23, minutes 00-59, seconds 00-59.")]
+
+    # 重构 datetime_est，处理夏令时模糊时间
+    data['hours'] = data['minute_of_day'] // 60
+    data['minutes'] = data['minute_of_day'] % 60
+    data['datetime_est'] = pd.to_datetime(
+        data['date'].astype(str) + ' ' +
+        data['hours'].astype(str) + ':' +
+        data['minutes'].astype(str) + ':00',
+        errors='coerce'
+    ).dt.tz_localize(est, ambiguous='NaT')
+
+    if data['datetime_est'].isna().any():
+        print("Warning: Some datetime_est values are NaT due to ambiguous time handling")

    # 解析测试日期和时间
    try:
-        test_date = pd.to_datetime(test_date).date()
-        test_datetime = pd.to_datetime(f"{test_date} {test_time}").tz_localize(est)  # 使用 est
-    except ValueError:
+        test_date = pd.to_datetime(test_date, format='%Y-%m-%d').date()
+        test_datetime = pd.to_datetime(f"{test_date} {test_time}", format='%Y-%m-%d %H:%M:%S').tz_localize(est, ambiguous=True)
+    except ValueError as e:
+        print(f"Error parsing date/time: {e}")
        return [html.Div("Invalid date or time format. Use YYYY-MM-DD and HH:MM:SS (e.g., 12:00:00).")]

-    # 1. 计算到 test_datetime 的累计推文数（模拟当时的 tweet_count）
-    data = render_data.global_agg_df.copy()
-    historical_data = data[data['datetime_est'] <= test_datetime]
-    if historical_data.empty:
-        return [html.Div(f"No data available up to {test_datetime}")]
-    tweet_count = historical_data['tweet_count'].sum()
+    # 计算周期开始时间（上一个周五 12:00 PM）
+    test_date_only = test_datetime.replace(hour=0, minute=0, second=0, microsecond=0)  # 只考虑日期部分
+    days_to_last_friday = (test_date_only.weekday() - 4) % 7  # 4 表示周五
+    cycle_start = test_date_only - timedelta(days=days_to_last_friday)
+    cycle_start = cycle_start.replace(hour=12, minute=0, second=0, microsecond=0)  # 已经是 tz-aware，直接调整时间

-    # 2. 计算实际最终推文数（到当天结束时的总数）
-    day_end = pd.to_datetime(f"{test_date} 23:59:59").tz_localize(est)  # 使用 est
-    actual_data = data[(data['date'] == test_date) & (data['datetime_est'] <= day_end)]
+    # 确保周期结束时间（下周五 12:00 PM EDT）考虑夏令时
+    cycle_end = cycle_start + timedelta(days=7)
+    if cycle_end.month == 3 and 8 <= cycle_end.day <= 14:  # 粗略检查夏令时开始（3月第二个星期日）
+        cycle_end = cycle_end.tz_convert(est)  # 转换为 EDT
+    else:
+        cycle_end = cycle_end.tz_convert(est)  # 保持一致
+
+    # 调试：打印周期信息
+    print(f"Cycle Start: {cycle_start}, Cycle End: {cycle_end}")
+
+    # 过滤周期内的数据
+    cycle_data = data[(data['datetime_est'] >= cycle_start) & (data['datetime_est'] <= test_datetime)]
+    if cycle_data.empty:
+        return [html.Div(f"No data available in cycle from {cycle_start} to {test_datetime}")]
+    tweet_count = cycle_data['tweet_count'].sum()
+
+    # 计算实际最终推文数（周期结束时的总数）
+    actual_data = data[(data['datetime_est'] >= cycle_start) & (data['datetime_est'] <= cycle_end)]
    if actual_data.empty:
-        return [html.Div(f"No data available for {test_date}")]
+        return [html.Div(f"No data available for cycle ending {cycle_end}")]
    actual_end_count = actual_data['tweet_count'].sum()

-    # 3. 模拟 days_to_next_friday（从 test_datetime 到下周五）
-    days_to_next_friday = (4 - test_date.weekday()) % 7
-    next_friday = (test_datetime.replace(hour=12, minute=0, second=0, microsecond=0) +
-                   timedelta(days=days_to_next_friday))
-    if test_datetime > next_friday:
-        next_friday += timedelta(days=7)
-    days_to_next_friday = (next_friday - test_datetime).total_seconds() / (24 * 60 * 60)
+    # 计算 days_to_next_friday（从 test_datetime 到周期结束）
+    days_to_next_friday = (cycle_end - test_datetime).total_seconds() / (24 * 60 * 60)

-    # 4. 设置预测范围（基于实际最终推文数的 ±10%）
-    prob_start = actual_end_count * 0.9  # 90% of actual
-    prob_end = actual_end_count * 1.1    # 110% of actual
+    # 设置预测范围
+    prob_start = actual_end_count * 0.9
+    prob_end = actual_end_count * 1.1

-    # 5. 调用原始的 calculate_tweet_probability() 计算概率
+    # 计算概率
    probability = calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end)
    prob_min, prob_max = map(float, probability.split(" - "))
    formatted_probability = f"{prob_min * 100:.2f}% - {prob_max * 100:.2f}%"

-    # 6. 构建测试结果表格
+    # 构建测试结果表格
    test_table_rows = [
-        html.Tr([
-            html.Th("Test Date and Time:", colSpan=2, style={'paddingRight': '10px'}),
-            html.Td(str(test_datetime), colSpan=6, style={'paddingRight': '10px'})
-        ]),
-        html.Tr([
-            html.Th("Tweet Count at Test Time:", colSpan=2, style={'paddingRight': '10px'}),
-            html.Td(str(tweet_count), colSpan=6, style={'paddingRight': '10px'})
-        ]),
-        html.Tr([
-            html.Th("Actual Final Tweet Count:", colSpan=2, style={'paddingRight': '10px'}),
-            html.Td(str(actual_end_count), colSpan=6, style={'paddingRight': '10px'})
-        ]),
-        html.Tr([
-            html.Th(f"Predicted Range ({int(prob_start)}-{int(prob_end)}):", colSpan=2, style={'paddingRight': '10px'}),
-            html.Td(formatted_probability, colSpan=6, style={'paddingRight': '10px'})
-        ]),
-        html.Tr([
-            html.Th("Does Actual Fall in Range?", colSpan=2, style={'paddingRight': '10px'}),
-            html.Td(
-                "Yes" if prob_start <= actual_end_count <= prob_end else "No",
-                colSpan=6,
-                style={'paddingRight': '10px', 'color': 'green' if prob_start <= actual_end_count <= prob_end else 'red'}
-            )
-        ])
+        html.Tr([html.Th("Test Date and Time:", colSpan=2), html.Td(str(test_datetime), colSpan=6)]),
+        html.Tr([html.Th("Tweet Count at Test Time:", colSpan=2), html.Td(str(tweet_count), colSpan=6)]),
+        html.Tr([html.Th("Actual Final Tweet Count:", colSpan=2), html.Td(str(actual_end_count), colSpan=6)]),
+        html.Tr([html.Th(f"Predicted Range ({int(prob_start)}-{int(prob_end)}):", colSpan=2), html.Td(formatted_probability, colSpan=6)]),
+        html.Tr([html.Th("Does Actual Fall in Range?", colSpan=2),
+                 html.Td("Yes" if prob_start <= actual_end_count <= prob_end else "No",
+                         colSpan=6, style={'color': 'green' if prob_start <= actual_end_count <= prob_end else 'red'})])
    ]
    if prob_start <= actual_end_count <= prob_end:
        expected_prob = (prob_max + prob_min) / 2
-        test_table_rows.append(
-            html.Tr([
-                html.Th("Expected Probability:", colSpan=2, style={'paddingRight': '10px'}),
-                html.Td(f"~{expected_prob * 100:.2f}% (should be high if model fits)", colSpan=6, style={'paddingRight': '10px'})
-            ])
-        )
+        test_table_rows.append(html.Tr([html.Th("Expected Probability:", colSpan=2),
+                                        html.Td(f"~{expected_prob * 100:.2f}% (should be high if model fits)", colSpan=6)]))
    else:
-        test_table_rows.append(
-            html.Tr([
-                html.Th("Note:", colSpan=2, style={'paddingRight': '10px'}),
-                html.Td("Model prediction does not match actual outcome.", colSpan=6, style={'paddingRight': '10px', 'color': 'red'})
-            ])
-        )
+        test_table_rows.append(html.Tr([html.Th("Note:", colSpan=2),
+                                        html.Td("Model prediction does not match actual outcome.", colSpan=6, style={'color': 'red'})]))

-    test_table = html.Table(test_table_rows, style={
-        'width': '100%',
-        'textAlign': 'left',
-        'borderCollapse': 'collapse'
-    })
+    test_table = html.Table(test_table_rows, style={'width': '100%', 'textAlign': 'left', 'borderCollapse': 'collapse'})
    return [test_table]