import dash from dash import dcc, html from dash.dependencies import Input, Output import plotly.graph_objs as go import pandas as pd import pytz from datetime import datetime from sqlalchemy import create_engine # Database connection configuration DB_CONFIG = { 'host': '8.155.23.172', 'port': 3306, 'user': 'root2', 'password': 'tG0f6PVYh18le41BCb', 'database': 'elonX' } TABLE_NAME = 'elon_tweets' # Create database connection using SQLAlchemy db_uri = f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}" engine = create_engine(db_uri) # Load data df = pd.read_sql(f'SELECT timestamp FROM {TABLE_NAME}', con=engine) # Data preprocessing (EST based) eastern = pytz.timezone('America/New_York') # EST pacific = pytz.timezone('America/Los_Angeles') # PST central = pytz.timezone('America/Chicago') # CST (Texas) df['datetime'] = pd.to_datetime(df['timestamp'], unit='s') df['datetime_est'] = df['datetime'].dt.tz_localize('UTC').dt.tz_convert(eastern) df['date'] = df['datetime_est'].dt.date df['minute_of_day'] = df['datetime_est'].dt.hour * 60 + df['datetime_est'].dt.minute agg_df = df.groupby(['date', 'minute_of_day']).size().reset_index(name='tweet_count') # Get all dates for selector, sorted from latest to earliest all_dates = sorted(agg_df['date'].unique(), reverse=True) default_date = [str(all_dates[0])] # Default to only the most recent day # Initialize Dash app app = dash.Dash(__name__) # Time interval options interval_options = [ {'label': '1 minute', 'value': 1}, {'label': '5 minutes', 'value': 5}, {'label': '10 minutes', 'value': 10}, {'label': '30 minutes', 'value': 30}, {'label': '60 minutes', 'value': 60} ] # Dash app layout app.layout = html.Div([ html.H1("Elon Musk Tweet Time Analysis (EST)"), dcc.Dropdown( id='multi-date-picker', options=[{'label': str(date), 'value': str(date)} for date in all_dates], value=default_date, multi=True, searchable=True, placeholder="Search and select dates (YYYY-MM-DD)", style={'width': '100%'} ), dcc.Dropdown( id='multi-interval-picker', options=interval_options, value=10, # Default 10 minutes style={'width': '50%', 'marginTop': '10px'} ), html.Div(id='multi-day-warning', style={'color': 'red', 'margin': '10px'}), dcc.Checklist( id='time-zone-checklist', options=[ {'label': 'California Time (PST)', 'value': 'PST'}, {'label': 'Texas Time (CST)', 'value': 'CST'} ], value=['PST'], # Default show PST, hide CST style={'margin': '10px'} ), html.Div(id='multi-tweet-summary', style={'fontSize': 20, 'margin': '10px'}), dcc.Graph(id='multi-tweet-graph'), html.Div([ html.Div(id='est-clock', style={'fontSize': 20, 'margin': '10px'}), html.Div(id='pst-clock', style={'fontSize': 20, 'margin': '10px'}) ]), dcc.Interval(id='clock-interval', interval=1000, n_intervals=0) # Update clocks every second ]) # Data aggregation function def aggregate_data(data, interval): all_minutes = pd.DataFrame({'interval_group': range(0, 1440, interval)}) result = [] for date in data['date'].unique(): day_data = data[data['date'] == date].copy() day_data['interval_group'] = (day_data['minute_of_day'] // interval) * interval agg = day_data.groupby('interval_group').size().reset_index(name='tweet_count') complete_data = all_minutes.merge(agg, on='interval_group', how='left').fillna({'tweet_count': 0}) complete_data['date'] = date result.append(complete_data) return pd.concat(result, ignore_index=True) # Generate X-axis ticks (EST time) def generate_xticks(interval): ticks = list(range(0, 1440, interval)) tick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in ticks] return ticks, tick_labels # Convert minutes to time string def minutes_to_time(minutes): hours = minutes // 60 mins = minutes % 60 return f"{hours:02d}:{mins:02d}" # Callback for updating clocks @app.callback( [Output('est-clock', 'children'), Output('pst-clock', 'children')], [Input('clock-interval', 'n_intervals')] ) def update_clocks(n): now_utc = datetime.now(pytz.UTC) est_time = now_utc.astimezone(eastern).strftime('%Y-%m-%d %H:%M:%S EST') pst_time = now_utc.astimezone(pacific).strftime('%Y-%m-%d %H:%M:%S PST') return f"Current EST Time: {est_time}", f"Current PST Time: {pst_time}" # Callback for updating multi-day graph, warning, and summary @app.callback( [Output('multi-tweet-graph', 'figure'), Output('multi-day-warning', 'children'), Output('multi-tweet-summary', 'children')], [Input('multi-date-picker', 'value'), Input('multi-interval-picker', 'value'), Input('time-zone-checklist', 'value')] ) def update_multi_graph(selected_dates, interval, time_zones): if len(selected_dates) > 10: selected_dates = selected_dates[:10] warning = "Maximum of 10 days can be selected. Showing first 10 selected days." else: warning = "" selected_dates = [datetime.strptime(date, '%Y-%m-%d').date() for date in selected_dates] multi_data = agg_df[agg_df['date'].isin(selected_dates)].copy() if multi_data.empty: multi_data = pd.DataFrame({'date': selected_dates, 'minute_of_day': [0] * len(selected_dates)}) tweet_count_total = 0 else: tweet_count_total = multi_data['tweet_count'].sum() agg_data = aggregate_data(multi_data, interval) xticks, xtick_labels = generate_xticks(interval if interval >= 30 else 60) fig = go.Figure() for date in selected_dates: day_data = agg_data[agg_data['date'] == date] hover_times = [f"{date} {minutes_to_time(minute)} EST" for minute in day_data['interval_group']] fig.add_trace(go.Scatter( x=day_data['interval_group'], y=day_data['tweet_count'], mode='lines', name=str(date), customdata=hover_times, hovertemplate='%{customdata}
Tweets: %{y}' )) # Add vertical lines for California (PST) and Texas (CST) times in EST # PST is 3 hours behind EST, CST is 1 hour behind EST if 'PST' in time_zones: pacific_2am_est = (2 + 3) * 60 # 2:00 PST = 5:00 EST (300 minutes) pacific_7am_est = (7 + 3) * 60 # 7:00 PST = 10:00 EST (600 minutes) fig.add_vline(x=pacific_2am_est, line_dash="dash", line_color="blue", annotation_text="CA 2AM PST") fig.add_vline(x=pacific_7am_est, line_dash="dash", line_color="blue", annotation_text="CA 7AM PST") if 'CST' in time_zones: central_2am_est = (2 + 1) * 60 # 2:00 CST = 3:00 EST (180 minutes) central_7am_est = (7 + 1) * 60 # 7:00 CST = 8:00 EST (480 minutes) fig.add_vline(x=central_2am_est, line_dash="dash", line_color="green", annotation_text="TX 2AM CST") fig.add_vline(x=central_7am_est, line_dash="dash", line_color="green", annotation_text="TX 7AM CST") fig.update_layout( title=f'Multi-Day Tweet Frequency Comparison (Interval: {interval} minutes, EST)', xaxis_title='Eastern Time (HH:MM)', yaxis_title='Tweet Count', xaxis=dict(range=[0, 1440], tickvals=xticks, ticktext=xtick_labels, tickangle=45), height=600, showlegend=True ) summary = f"Total tweets for selected dates: {int(tweet_count_total)}" return fig, warning, summary # Run the app if __name__ == '__main__': app.run_server(debug=True)