diff --git a/pkg/dash/func/render.py b/pkg/dash/func/render.py index f5aeb05..a87cdaa 100644 --- a/pkg/dash/func/render.py +++ b/pkg/dash/func/render.py @@ -2,10 +2,12 @@ from datetime import datetime, timedelta from dash.dependencies import Input, Output from pkg.dash.app_init import app from pkg.config import render_data -from pkg.tool import aggregate_data, minutes_to_time, get_tweets_since_last_friday +from pkg.tool import aggregate_data, minutes_to_time, get_tweets_since_last_friday,get_pace_and_total_tweets from dash import dcc import plotly.graph_objs as go import pandas as pd +import pytz +import numpy as np @app.callback( [Output('tabs-content', 'children'), diff --git a/pkg/tool.py b/pkg/tool.py index a3c70c7..bc04850 100644 --- a/pkg/tool.py +++ b/pkg/tool.py @@ -96,3 +96,54 @@ def format_time_str(days_to_next_friday): return f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s ({total_hours}h)" +def get_pace_and_total_tweets(target_time: datetime) -> tuple[float, int]: + est = pytz.timezone('US/Eastern') + + # 如果 target_time 没有时区信息,假设为 EST + if target_time.tzinfo is None: + target_time = est.localize(target_time) + + # 计算上周五 12:00 AM EST + target_date = target_time.date() + days_since_last_friday = (target_date.weekday() + 3) % 7 # 距离上周五的天数 + last_friday = target_time - timedelta(days=days_since_last_friday) + last_friday_midnight = last_friday.replace(hour=0, minute=0, second=0, microsecond=0) + + # 计算下周五 12:00 AM EST + days_to_next_friday = (4 - target_date.weekday()) % 7 + next_friday = target_time + timedelta(days=days_to_next_friday) + next_friday_midnight = next_friday.replace(hour=0, minute=0, second=0, microsecond=0) + if target_time > next_friday_midnight: + next_friday_midnight += timedelta(days=7) + + # 从 global_agg_df 中筛选从上周五 12:00 AM 到 target_time 的数据 + if hasattr(render_data, 'global_agg_df') and not render_data.global_agg_df.empty: + multi_data_agg = render_data.global_agg_df[ + (render_data.global_agg_df['date'] >= last_friday_midnight.date()) & + (render_data.global_agg_df['date'] <= target_date) + ].copy() + else: + multi_data_agg = pd.DataFrame() + + if multi_data_agg.empty: + total_tweets = 0 + else: + # 使用 minute_of_day 转换为时间戳并筛选到 target_time 之前 + multi_data_agg['timestamp'] = pd.to_datetime(multi_data_agg['date'].astype(str)) + \ + pd.to_timedelta(multi_data_agg['minute_of_day'], unit='m') + multi_data_agg['timestamp'] = multi_data_agg['timestamp'].dt.tz_localize(est) + multi_data_agg = multi_data_agg[multi_data_agg['timestamp'] <= target_time] + total_tweets = multi_data_agg['tweet_count'].sum() if 'tweet_count' in multi_data_agg else 0 + + # 计算 Pace + days_elapsed = (target_time - last_friday_midnight).total_seconds() / (24 * 60 * 60) + days_remaining = (next_friday_midnight - target_time).total_seconds() / (24 * 60 * 60) + + if days_elapsed > 0 and total_tweets > 0: + daily_avg = total_tweets / days_elapsed + pace = daily_avg * days_remaining + total_tweets + else: + pace = float(total_tweets) # 如果没有数据或时间未开始,Pace 等于当前推文数 + + return round(pace, 2), int(total_tweets) +