195 lines
7.5 KiB
Python
195 lines
7.5 KiB
Python
import dash
|
|
from dash import dcc, html
|
|
from dash.dependencies import Input, Output
|
|
import plotly.graph_objs as go
|
|
import pandas as pd
|
|
import pytz
|
|
from datetime import datetime
|
|
from sqlalchemy import create_engine
|
|
|
|
# Database connection configuration
|
|
DB_CONFIG = {
|
|
'host': '8.155.23.172',
|
|
'port': 3306,
|
|
'user': 'root2',
|
|
'password': 'tG0f6PVYh18le41BCb',
|
|
'database': 'elonX'
|
|
}
|
|
TABLE_NAME = 'elon_tweets'
|
|
|
|
# Create database connection using SQLAlchemy
|
|
db_uri = f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
|
|
engine = create_engine(db_uri)
|
|
|
|
# Load data
|
|
df = pd.read_sql(f'SELECT timestamp FROM {TABLE_NAME}', con=engine)
|
|
|
|
# Data preprocessing (EST based)
|
|
eastern = pytz.timezone('America/New_York') # EST
|
|
pacific = pytz.timezone('America/Los_Angeles') # PST
|
|
central = pytz.timezone('America/Chicago') # CST (Texas)
|
|
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
|
|
df['datetime_est'] = df['datetime'].dt.tz_localize('UTC').dt.tz_convert(eastern)
|
|
df['date'] = df['datetime_est'].dt.date
|
|
df['minute_of_day'] = df['datetime_est'].dt.hour * 60 + df['datetime_est'].dt.minute
|
|
agg_df = df.groupby(['date', 'minute_of_day']).size().reset_index(name='tweet_count')
|
|
|
|
# Get all dates for selector, sorted from latest to earliest
|
|
all_dates = sorted(agg_df['date'].unique(), reverse=True)
|
|
default_date = [str(all_dates[0])] # Default to only the most recent day
|
|
|
|
# Initialize Dash app
|
|
app = dash.Dash(__name__)
|
|
|
|
# Time interval options
|
|
interval_options = [
|
|
{'label': '1 minute', 'value': 1},
|
|
{'label': '5 minutes', 'value': 5},
|
|
{'label': '10 minutes', 'value': 10},
|
|
{'label': '30 minutes', 'value': 30},
|
|
{'label': '60 minutes', 'value': 60}
|
|
]
|
|
|
|
# Dash app layout
|
|
app.layout = html.Div([
|
|
html.H1("Elon Musk Tweet Time Analysis (EST)"),
|
|
dcc.Dropdown(
|
|
id='multi-date-picker',
|
|
options=[{'label': str(date), 'value': str(date)} for date in all_dates],
|
|
value=default_date,
|
|
multi=True,
|
|
searchable=True,
|
|
placeholder="Search and select dates (YYYY-MM-DD)",
|
|
style={'width': '100%'}
|
|
),
|
|
dcc.Dropdown(
|
|
id='multi-interval-picker',
|
|
options=interval_options,
|
|
value=10, # Default 10 minutes
|
|
style={'width': '50%', 'marginTop': '10px'}
|
|
),
|
|
html.Div(id='multi-day-warning', style={'color': 'red', 'margin': '10px'}),
|
|
dcc.Checklist(
|
|
id='time-zone-checklist',
|
|
options=[
|
|
{'label': 'California Time (PST)', 'value': 'PST'},
|
|
{'label': 'Texas Time (CST)', 'value': 'CST'}
|
|
],
|
|
value=['PST'], # Default show PST, hide CST
|
|
style={'margin': '10px'}
|
|
),
|
|
html.Div(id='multi-tweet-summary', style={'fontSize': 20, 'margin': '10px'}),
|
|
dcc.Graph(id='multi-tweet-graph'),
|
|
html.Div([
|
|
html.Div(id='est-clock', style={'fontSize': 20, 'margin': '10px'}),
|
|
html.Div(id='pst-clock', style={'fontSize': 20, 'margin': '10px'})
|
|
]),
|
|
dcc.Interval(id='clock-interval', interval=1000, n_intervals=0) # Update clocks every second
|
|
])
|
|
|
|
# Data aggregation function
|
|
def aggregate_data(data, interval):
|
|
all_minutes = pd.DataFrame({'interval_group': range(0, 1440, interval)})
|
|
result = []
|
|
for date in data['date'].unique():
|
|
day_data = data[data['date'] == date].copy()
|
|
day_data['interval_group'] = (day_data['minute_of_day'] // interval) * interval
|
|
agg = day_data.groupby('interval_group').size().reset_index(name='tweet_count')
|
|
complete_data = all_minutes.merge(agg, on='interval_group', how='left').fillna({'tweet_count': 0})
|
|
complete_data['date'] = date
|
|
result.append(complete_data)
|
|
return pd.concat(result, ignore_index=True)
|
|
|
|
# Generate X-axis ticks (EST time)
|
|
def generate_xticks(interval):
|
|
ticks = list(range(0, 1440, interval))
|
|
tick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in ticks]
|
|
return ticks, tick_labels
|
|
|
|
# Convert minutes to time string
|
|
def minutes_to_time(minutes):
|
|
hours = minutes // 60
|
|
mins = minutes % 60
|
|
return f"{hours:02d}:{mins:02d}"
|
|
|
|
# Callback for updating clocks
|
|
@app.callback(
|
|
[Output('est-clock', 'children'),
|
|
Output('pst-clock', 'children')],
|
|
[Input('clock-interval', 'n_intervals')]
|
|
)
|
|
def update_clocks(n):
|
|
now_utc = datetime.now(pytz.UTC)
|
|
est_time = now_utc.astimezone(eastern).strftime('%Y-%m-%d %H:%M:%S EST')
|
|
pst_time = now_utc.astimezone(pacific).strftime('%Y-%m-%d %H:%M:%S PST')
|
|
return f"Current EST Time: {est_time}", f"Current PST Time: {pst_time}"
|
|
|
|
# Callback for updating multi-day graph, warning, and summary
|
|
@app.callback(
|
|
[Output('multi-tweet-graph', 'figure'),
|
|
Output('multi-day-warning', 'children'),
|
|
Output('multi-tweet-summary', 'children')],
|
|
[Input('multi-date-picker', 'value'),
|
|
Input('multi-interval-picker', 'value'),
|
|
Input('time-zone-checklist', 'value')]
|
|
)
|
|
def update_multi_graph(selected_dates, interval, time_zones):
|
|
if len(selected_dates) > 10:
|
|
selected_dates = selected_dates[:10]
|
|
warning = "Maximum of 10 days can be selected. Showing first 10 selected days."
|
|
else:
|
|
warning = ""
|
|
|
|
selected_dates = [datetime.strptime(date, '%Y-%m-%d').date() for date in selected_dates]
|
|
multi_data = agg_df[agg_df['date'].isin(selected_dates)].copy()
|
|
if multi_data.empty:
|
|
multi_data = pd.DataFrame({'date': selected_dates, 'minute_of_day': [0] * len(selected_dates)})
|
|
tweet_count_total = 0
|
|
else:
|
|
tweet_count_total = multi_data['tweet_count'].sum()
|
|
|
|
agg_data = aggregate_data(multi_data, interval)
|
|
xticks, xtick_labels = generate_xticks(interval if interval >= 30 else 60)
|
|
|
|
fig = go.Figure()
|
|
for date in selected_dates:
|
|
day_data = agg_data[agg_data['date'] == date]
|
|
hover_times = [f"{date} {minutes_to_time(minute)} EST" for minute in day_data['interval_group']]
|
|
fig.add_trace(go.Scatter(
|
|
x=day_data['interval_group'],
|
|
y=day_data['tweet_count'],
|
|
mode='lines',
|
|
name=str(date),
|
|
customdata=hover_times,
|
|
hovertemplate='%{customdata}<br>Tweets: %{y}<extra></extra>'
|
|
))
|
|
|
|
# Add vertical lines for California (PST) and Texas (CST) times in EST
|
|
# PST is 3 hours behind EST, CST is 1 hour behind EST
|
|
if 'PST' in time_zones:
|
|
pacific_2am_est = (2 + 3) * 60 # 2:00 PST = 5:00 EST (300 minutes)
|
|
pacific_7am_est = (7 + 3) * 60 # 7:00 PST = 10:00 EST (600 minutes)
|
|
fig.add_vline(x=pacific_2am_est, line_dash="dash", line_color="blue", annotation_text="CA 2AM PST")
|
|
fig.add_vline(x=pacific_7am_est, line_dash="dash", line_color="blue", annotation_text="CA 7AM PST")
|
|
|
|
if 'CST' in time_zones:
|
|
central_2am_est = (2 + 1) * 60 # 2:00 CST = 3:00 EST (180 minutes)
|
|
central_7am_est = (7 + 1) * 60 # 7:00 CST = 8:00 EST (480 minutes)
|
|
fig.add_vline(x=central_2am_est, line_dash="dash", line_color="green", annotation_text="TX 2AM CST")
|
|
fig.add_vline(x=central_7am_est, line_dash="dash", line_color="green", annotation_text="TX 7AM CST")
|
|
|
|
fig.update_layout(
|
|
title=f'Multi-Day Tweet Frequency Comparison (Interval: {interval} minutes, EST)',
|
|
xaxis_title='Eastern Time (HH:MM)',
|
|
yaxis_title='Tweet Count',
|
|
xaxis=dict(range=[0, 1440], tickvals=xticks, ticktext=xtick_labels, tickangle=45),
|
|
height=600,
|
|
showlegend=True
|
|
)
|
|
|
|
summary = f"Total tweets for selected dates: {int(tweet_count_total)}"
|
|
return fig, warning, summary
|
|
|
|
# Run the app
|
|
if __name__ == '__main__':
|
|
app.run_server(debug=True) |