From 192bd42d0eec846f72f266709217f2b3bd4762e2 Mon Sep 17 00:00:00 2001 From: NY Date: Fri, 14 Mar 2025 15:27:11 +0800 Subject: [PATCH] +probability(unfinished) --- pkg/dash/app_html.py | 42 ++++++++++++++- pkg/dash/func/info_test.py | 106 +++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 pkg/dash/func/info_test.py diff --git a/pkg/dash/app_html.py b/pkg/dash/app_html.py index 04333be..91565ee 100644 --- a/pkg/dash/app_html.py +++ b/pkg/dash/app_html.py @@ -1,7 +1,7 @@ +from datetime import timedelta, datetime from dash import dcc, html from pkg.config import interval_options, days_options, render_data - def layout_config(app): app.layout = html.Div([ html.Div( @@ -229,9 +229,47 @@ def layout_config(app): 'width': '50%', 'marginTop': '10px', 'borderCollapse': 'collapse' + }), + # 新增测试区域 + html.H2("Historical Probability Test", style={'marginTop': '20px'}), + html.Table([ + html.Tr([ + html.Td("Test Date:", style={'paddingRight': '10px'}), + html.Td( + dcc.DatePickerSingle( + id='test-date-input', + date=(datetime.now().date() - timedelta(days=1)).strftime('%Y-%m-%d'), # 默认昨天 + display_format='YYYY-MM-DD', + style={'width': '100%'} + ) + ) + ]), + html.Tr([ + html.Td("Test Time:", style={'paddingRight': '10px'}), + html.Td( + dcc.Input( + id='test-time-input', + type='text', + placeholder='HH:MM:SS (e.g., 12:00:00)', + value='12:00:00', + style={'width': '100%'} + ) + ) + ]), + html.Tr([ + html.Td("Test Probability:", style={'paddingRight': '10px'}), + html.Td( + html.Button('Test', id='test-button', n_clicks=0) + ) + ]), + html.Tr(id='test-info-tooltip', style={'margin': '10px'}) + ], style={ + 'width': '50%', + 'marginTop': '10px', + 'borderCollapse': 'collapse' }) ], style={'marginLeft': '50px'}), dcc.Interval(id='clock-interval', interval=1000, n_intervals=0) ]) - return app + return app \ No newline at end of file diff --git a/pkg/dash/func/info_test.py b/pkg/dash/func/info_test.py new file mode 100644 index 0000000..f045c2e --- /dev/null +++ b/pkg/dash/func/info_test.py @@ -0,0 +1,106 @@ +from pkg.dash.func.info_func import * +from pkg.dash.app_init import app +from dash.dependencies import Input, Output +from dash import html +import pandas as pd +from datetime import timedelta + +@app.callback( + [Output('test-info-tooltip', 'children')], + [Input('test-button', 'n_clicks'), + Input('test-date-input', 'date'), + Input('test-time-input', 'value')] +) +def update_test_info(n_clicks, test_date, test_time): + if n_clicks == 0: + return [html.Div("Click 'Test' to see historical probability results.")] + + est = pytz.timezone('US/Eastern') + + # 解析测试日期和时间 + try: + test_date = pd.to_datetime(test_date).date() + test_datetime = pd.to_datetime(f"{test_date} {test_time}").tz_localize(est) # 使用 est + except ValueError: + return [html.Div("Invalid date or time format. Use YYYY-MM-DD and HH:MM:SS (e.g., 12:00:00).")] + + # 1. 计算到 test_datetime 的累计推文数(模拟当时的 tweet_count) + data = render_data.global_agg_df.copy() + historical_data = data[data['datetime_est'] <= test_datetime] + if historical_data.empty: + return [html.Div(f"No data available up to {test_datetime}")] + tweet_count = historical_data['tweet_count'].sum() + + # 2. 计算实际最终推文数(到当天结束时的总数) + day_end = pd.to_datetime(f"{test_date} 23:59:59").tz_localize(est) # 使用 est + actual_data = data[(data['date'] == test_date) & (data['datetime_est'] <= day_end)] + if actual_data.empty: + return [html.Div(f"No data available for {test_date}")] + actual_end_count = actual_data['tweet_count'].sum() + + # 3. 模拟 days_to_next_friday(从 test_datetime 到下周五) + days_to_next_friday = (4 - test_date.weekday()) % 7 + next_friday = (test_datetime.replace(hour=12, minute=0, second=0, microsecond=0) + + timedelta(days=days_to_next_friday)) + if test_datetime > next_friday: + next_friday += timedelta(days=7) + days_to_next_friday = (next_friday - test_datetime).total_seconds() / (24 * 60 * 60) + + # 4. 设置预测范围(基于实际最终推文数的 ±10%) + prob_start = actual_end_count * 0.9 # 90% of actual + prob_end = actual_end_count * 1.1 # 110% of actual + + # 5. 调用原始的 calculate_tweet_probability() 计算概率 + probability = calculate_tweet_probability(tweet_count, days_to_next_friday, prob_start, prob_end) + prob_min, prob_max = map(float, probability.split(" - ")) + formatted_probability = f"{prob_min * 100:.2f}% - {prob_max * 100:.2f}%" + + # 6. 构建测试结果表格 + test_table_rows = [ + html.Tr([ + html.Th("Test Date and Time:", colSpan=2, style={'paddingRight': '10px'}), + html.Td(str(test_datetime), colSpan=6, style={'paddingRight': '10px'}) + ]), + html.Tr([ + html.Th("Tweet Count at Test Time:", colSpan=2, style={'paddingRight': '10px'}), + html.Td(str(tweet_count), colSpan=6, style={'paddingRight': '10px'}) + ]), + html.Tr([ + html.Th("Actual Final Tweet Count:", colSpan=2, style={'paddingRight': '10px'}), + html.Td(str(actual_end_count), colSpan=6, style={'paddingRight': '10px'}) + ]), + html.Tr([ + html.Th(f"Predicted Range ({int(prob_start)}-{int(prob_end)}):", colSpan=2, style={'paddingRight': '10px'}), + html.Td(formatted_probability, colSpan=6, style={'paddingRight': '10px'}) + ]), + html.Tr([ + html.Th("Does Actual Fall in Range?", colSpan=2, style={'paddingRight': '10px'}), + html.Td( + "Yes" if prob_start <= actual_end_count <= prob_end else "No", + colSpan=6, + style={'paddingRight': '10px', 'color': 'green' if prob_start <= actual_end_count <= prob_end else 'red'} + ) + ]) + ] + if prob_start <= actual_end_count <= prob_end: + expected_prob = (prob_max + prob_min) / 2 + test_table_rows.append( + html.Tr([ + html.Th("Expected Probability:", colSpan=2, style={'paddingRight': '10px'}), + html.Td(f"~{expected_prob * 100:.2f}% (should be high if model fits)", colSpan=6, style={'paddingRight': '10px'}) + ]) + ) + else: + test_table_rows.append( + html.Tr([ + html.Th("Note:", colSpan=2, style={'paddingRight': '10px'}), + html.Td("Model prediction does not match actual outcome.", colSpan=6, style={'paddingRight': '10px', 'color': 'red'}) + ]) + ) + + test_table = html.Table(test_table_rows, style={ + 'width': '100%', + 'textAlign': 'left', + 'borderCollapse': 'collapse' + }) + return [test_table] \ No newline at end of file