from datetime import datetime, timedelta import pandas as pd from pkg.config import render_data import pytz def aggregate_data(data, interval): all_minutes = pd.DataFrame({'interval_group': range(0, 1440, interval)}) result = [] if data.empty or 'date' not in data.columns: complete_data = all_minutes.copy() complete_data['tweet_count'] = 0 complete_data['date'] = datetime.now().date() return complete_data for date in data['date'].unique(): day_data = data[data['date'] == date].copy() day_data['interval_group'] = (day_data['minute_of_day'] // interval) * interval agg = day_data.groupby('interval_group').size().reset_index(name='tweet_count') complete_data = all_minutes.merge(agg, on='interval_group', how='left').fillna({'tweet_count': 0}) complete_data['date'] = date result.append(complete_data) if not result: complete_data = all_minutes.copy() complete_data['tweet_count'] = 0 complete_data['date'] = data['date'].iloc[0] if not data.empty else datetime.now().date() return complete_data return pd.concat(result, ignore_index=True) def generate_xticks(interval): if interval <= 5: tick_step = 60 elif interval <= 10: tick_step = 60 elif interval <= 30: tick_step = 120 else: tick_step = 240 ticks = list(range(0, 1440, tick_step)) tick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in ticks] return ticks, tick_labels def minutes_to_time(minutes): hours = minutes // 60 mins = minutes % 60 return f"{hours:02d}:{mins:02d}" def get_tweets_since_last_friday(): est = pytz.timezone('US/Eastern') now_est = datetime.now(est) today = now_est.date() days_since_friday = (today.weekday() - 4) % 7 this_friday = today - timedelta(days=days_since_friday) this_friday_datetime = est.localize(datetime.combine(this_friday, datetime.strptime("12:00", "%H:%M").time())) last_friday = this_friday - timedelta(days=7) last_friday_datetime = est.localize(datetime.combine(last_friday, datetime.strptime("12:00", "%H:%M").time())) if now_est < this_friday_datetime: start_datetime = last_friday_datetime else: start_datetime = this_friday_datetime if hasattr(render_data, 'global_df') and not render_data.global_df.empty: df = render_data.global_df.copy() mask = df['datetime_est'] >= start_datetime filtered_df = df[mask] tweet_count = len(filtered_df) return int(tweet_count) return 0 def get_time_since_last_tweet(): est = pytz.timezone('US/Eastern') now_est = datetime.now(est) if (not hasattr(render_data, 'global_df') or render_data.global_df is None or render_data.global_df.empty): return 0.0 df = render_data.global_df if 'datetime_est' not in df.columns: return 0.0 latest_tweet_time = df['datetime_est'].max() time_diff = now_est - latest_tweet_time days_diff = time_diff.total_seconds() / (24 * 60 * 60) # 转换为天数 return days_diff def format_time_str(days_to_next_friday): total_seconds = days_to_next_friday * 24 * 60 * 60 days = int(total_seconds // (24 * 60 * 60)) hours = int((total_seconds % (24 * 60 * 60)) // (60 * 60)) minutes = int((total_seconds % (60 * 60)) // 60) seconds = int(total_seconds % 60) total_hours = round(days_to_next_friday * 24, 2) return f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s ({total_hours}h)" def get_hourly_weighted_array(): est = pytz.timezone('US/Eastern') now = datetime.now(est).date() last_7_days = [now - timedelta(days=i) for i in range(7)] multi_data_agg = render_data.global_agg_df[ render_data.global_agg_df['date'].isin(last_7_days)].copy() if multi_data_agg.empty: return [1 / 24] * 24 agg_data = aggregate_data(multi_data_agg, 60) one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index() tweet_count_total = one_day_data['tweet_count'].sum() hourly_rates = [0] * 24 for _, row in one_day_data.iterrows(): minute = row['interval_group'] hour = int(minute // 60) if hour < 24: hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0 total_rate = sum(hourly_rates) if total_rate > 0: hourly_rates = [rate / total_rate for rate in hourly_rates] else: hourly_rates = [1 / 24] * 24 return hourly_rates