2025-03-06 13:59:21 +08:00
|
|
|
from datetime import datetime, timedelta
|
2025-03-05 10:24:46 +08:00
|
|
|
import pandas as pd
|
2025-03-06 13:59:21 +08:00
|
|
|
from pkg.config import render_data
|
|
|
|
import pytz
|
2025-03-05 10:24:46 +08:00
|
|
|
|
|
|
|
def aggregate_data(data, interval):
|
|
|
|
all_minutes = pd.DataFrame({'interval_group': range(0, 1440, interval)})
|
|
|
|
result = []
|
2025-03-06 10:16:59 +08:00
|
|
|
|
|
|
|
if data.empty or 'date' not in data.columns:
|
|
|
|
complete_data = all_minutes.copy()
|
|
|
|
complete_data['tweet_count'] = 0
|
|
|
|
complete_data['date'] = datetime.now().date()
|
|
|
|
return complete_data
|
|
|
|
|
2025-03-05 10:24:46 +08:00
|
|
|
for date in data['date'].unique():
|
|
|
|
day_data = data[data['date'] == date].copy()
|
|
|
|
day_data['interval_group'] = (day_data['minute_of_day'] // interval) * interval
|
|
|
|
agg = day_data.groupby('interval_group').size().reset_index(name='tweet_count')
|
|
|
|
complete_data = all_minutes.merge(agg, on='interval_group', how='left').fillna({'tweet_count': 0})
|
|
|
|
complete_data['date'] = date
|
|
|
|
result.append(complete_data)
|
2025-03-06 10:16:59 +08:00
|
|
|
|
|
|
|
if not result:
|
|
|
|
complete_data = all_minutes.copy()
|
|
|
|
complete_data['tweet_count'] = 0
|
|
|
|
complete_data['date'] = data['date'].iloc[0] if not data.empty else datetime.now().date()
|
|
|
|
return complete_data
|
|
|
|
|
2025-03-05 10:24:46 +08:00
|
|
|
return pd.concat(result, ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
def generate_xticks(interval):
|
|
|
|
if interval <= 5:
|
|
|
|
tick_step = 60
|
|
|
|
elif interval <= 10:
|
|
|
|
tick_step = 60
|
|
|
|
elif interval <= 30:
|
|
|
|
tick_step = 120
|
|
|
|
else:
|
|
|
|
tick_step = 240
|
|
|
|
ticks = list(range(0, 1440, tick_step))
|
|
|
|
tick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in ticks]
|
|
|
|
return ticks, tick_labels
|
|
|
|
|
|
|
|
|
|
|
|
def minutes_to_time(minutes):
|
|
|
|
hours = minutes // 60
|
|
|
|
mins = minutes % 60
|
|
|
|
return f"{hours:02d}:{mins:02d}"
|
2025-03-06 13:59:21 +08:00
|
|
|
|
|
|
|
|
|
|
|
def get_tweets_since_last_friday():
|
|
|
|
est = pytz.timezone('US/Eastern')
|
|
|
|
now_est = datetime.now(est)
|
|
|
|
today = now_est.date()
|
2025-03-07 13:52:35 +08:00
|
|
|
days_since_friday = (today.weekday() - 4) % 7
|
|
|
|
this_friday = today - timedelta(days=days_since_friday)
|
2025-03-06 13:59:21 +08:00
|
|
|
this_friday_datetime = est.localize(datetime.combine(this_friday, datetime.strptime("12:00", "%H:%M").time()))
|
2025-03-07 13:52:35 +08:00
|
|
|
last_friday = this_friday - timedelta(days=7)
|
|
|
|
last_friday_datetime = est.localize(datetime.combine(last_friday, datetime.strptime("12:00", "%H:%M").time()))
|
|
|
|
if now_est < this_friday_datetime:
|
|
|
|
start_datetime = last_friday_datetime
|
|
|
|
else:
|
|
|
|
start_datetime = this_friday_datetime
|
2025-03-06 13:59:21 +08:00
|
|
|
if hasattr(render_data, 'global_df') and not render_data.global_df.empty:
|
|
|
|
df = render_data.global_df.copy()
|
2025-03-07 13:52:35 +08:00
|
|
|
mask = df['datetime_est'] >= start_datetime
|
2025-03-06 13:59:21 +08:00
|
|
|
filtered_df = df[mask]
|
|
|
|
tweet_count = len(filtered_df)
|
|
|
|
return int(tweet_count)
|
2025-03-07 13:52:35 +08:00
|
|
|
return 0
|
|
|
|
|
2025-03-07 14:14:08 +08:00
|
|
|
|
|
|
|
def get_time_since_last_tweet():
|
|
|
|
est = pytz.timezone('US/Eastern')
|
|
|
|
now_est = datetime.now(est)
|
|
|
|
if (not hasattr(render_data, 'global_df') or
|
|
|
|
render_data.global_df is None or
|
|
|
|
render_data.global_df.empty):
|
|
|
|
return 0.0
|
|
|
|
df = render_data.global_df
|
|
|
|
if 'datetime_est' not in df.columns:
|
|
|
|
return 0.0
|
|
|
|
latest_tweet_time = df['datetime_est'].max()
|
|
|
|
time_diff = now_est - latest_tweet_time
|
|
|
|
days_diff = time_diff.total_seconds() / (24 * 60 * 60) # 转换为天数
|
|
|
|
return days_diff
|
|
|
|
|
2025-03-07 13:52:35 +08:00
|
|
|
def format_time_str(days_to_next_friday):
|
|
|
|
total_seconds = days_to_next_friday * 24 * 60 * 60
|
|
|
|
days = int(total_seconds // (24 * 60 * 60))
|
|
|
|
hours = int((total_seconds % (24 * 60 * 60)) // (60 * 60))
|
|
|
|
minutes = int((total_seconds % (60 * 60)) // 60)
|
|
|
|
seconds = int(total_seconds % 60)
|
|
|
|
total_hours = round(days_to_next_friday * 24, 2)
|
2025-03-12 17:40:20 +08:00
|
|
|
return f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s ({total_hours}h)"
|
|
|
|
|
|
|
|
|
|
|
|
def get_hourly_weighted_array():
|
|
|
|
est = pytz.timezone('US/Eastern')
|
|
|
|
now = datetime.now(est).date()
|
|
|
|
last_7_days = [now - timedelta(days=i) for i in range(7)]
|
|
|
|
|
|
|
|
multi_data_agg = render_data.global_agg_df[
|
|
|
|
render_data.global_agg_df['date'].isin(last_7_days)].copy()
|
|
|
|
|
|
|
|
if multi_data_agg.empty:
|
|
|
|
return [1 / 24] * 24
|
|
|
|
|
|
|
|
agg_data = aggregate_data(multi_data_agg, 60)
|
|
|
|
one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
|
|
|
|
tweet_count_total = one_day_data['tweet_count'].sum()
|
|
|
|
|
|
|
|
hourly_rates = [0] * 24
|
|
|
|
for _, row in one_day_data.iterrows():
|
|
|
|
minute = row['interval_group']
|
|
|
|
hour = int(minute // 60)
|
|
|
|
if hour < 24:
|
|
|
|
hourly_rates[hour] = row['tweet_count'] / tweet_count_total if tweet_count_total > 0 else 0
|
|
|
|
|
|
|
|
total_rate = sum(hourly_rates)
|
|
|
|
if total_rate > 0:
|
|
|
|
hourly_rates = [rate / total_rate for rate in hourly_rates]
|
|
|
|
else:
|
|
|
|
hourly_rates = [1 / 24] * 24
|
|
|
|
return hourly_rates
|