Compare commits

...

10 Commits

Author SHA1 Message Date
NY
8a4aa2c0c0 ui修改 2025-03-06 10:16:59 +08:00
NY
e2ad47a13c dk 2025-03-06 10:14:06 +08:00
NY
de5fdba84a ui修改 2025-03-05 11:48:14 +08:00
NY
0c161934a3 添加一天内发帖热图.md 2025-03-05 11:21:48 +08:00
NY
3f36d5b781 添加一天内发帖热图 2025-03-05 11:21:37 +08:00
NY
60bbbd40d0 fix 2025-03-05 11:18:21 +08:00
NY
938aa3ea8d 实时时间显示对齐 2025-03-05 10:44:38 +08:00
NY
2bf5cf28b7 1.▶️🔄
2.修改项目结构
2025-03-05 10:24:46 +08:00
NY
31b3a35c18 ▶️🔄(还未完成) 2025-03-03 17:42:30 +08:00
NY
ed27ebfa7c typo 2025-03-03 15:53:11 +08:00
23 changed files with 1058 additions and 33541 deletions

View File

@ -1,28 +0,0 @@
# get-tweets
Get a user's tweets without an API key or doing any web scraping. This script uses web requests to get a user's tweets as if you were an unauthenticated user.
Data you get:
* Tweets (including retweets & replies)
* id
* text
* created_at
* retweet_count
* favorite_count
* reply_count
* quote_count
* retweeted
* is_quote_status
* possibly_sensitive
Requirements are:
* requests - To make the http requests to get the tweet data
# Usage
1. clone the repo
2. pip install -r requirements.txt
3. python get-tweets username --output name_of_file.csv

View File

@ -1,363 +0,0 @@
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import pytz
from datetime import datetime, timedelta
from sqlalchemy import create_engine
from dash import clientside_callback
# Database connection configuration (unchanged)
DB_CONFIG = {
'host': '8.155.23.172',
'port': 3306,
'user': 'root2',
'password': 'tG0f6PVYh18le41BCb',
'database': 'elonX'
}
TABLE_NAME = 'elon_tweets'
db_uri = f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
engine = create_engine(db_uri)
# Load data (unchanged)
df = pd.read_sql(f'SELECT timestamp FROM {TABLE_NAME}', con=engine)
eastern = pytz.timezone('America/New_York')
pacific = pytz.timezone('America/Los_Angeles')
central = pytz.timezone('America/Chicago')
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
df['datetime_est'] = df['datetime'].dt.tz_localize('UTC').dt.tz_convert(eastern)
df['date'] = df['datetime_est'].dt.date
df['minute_of_day'] = df['datetime_est'].dt.hour * 60 + df['datetime_est'].dt.minute
agg_df = df.groupby(['date', 'minute_of_day']).size().reset_index(name='tweet_count')
all_dates = sorted(agg_df['date'].unique(), reverse=True)
default_date = [str(all_dates[0])]
# Initialize Dash app (unchanged)
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
# Time interval and days options (Modified: removed 1 day, added 120 and 240 days)
interval_options = [
{'label': '1 minute', 'value': 1},
{'label': '5 minutes', 'value': 5},
{'label': '10 minutes', 'value': 10},
{'label': '30 minutes', 'value': 30},
{'label': '60 minutes', 'value': 60}
]
days_options = [
{'label': '7 days', 'value': 7},
{'label': '30 days', 'value': 30},
{'label': '90 days', 'value': 90},
{'label': '120 days', 'value': 120},
{'label': '240 days', 'value': 240}
]
# Dash app layout (unchanged except default days value)
app.layout = html.Div([
# Left sidebar with clock button and tooltip (unchanged)
html.Div(
id='clock-container',
children=[
html.Div(
id='clock-button',
children='🕒',
style={
'fontSize': '24px',
'cursor': 'pointer',
'padding': '5px',
}
),
html.Div(
id='clock-tooltip',
children=[
html.Div(id='pst-clock'),
html.Div(id='cst-clock'),
html.Div(id='est-clock')
],
style={
'position': 'absolute',
'left': '35px',
'top': '0px',
'backgroundColor': 'rgba(0, 0, 0, 0.8)',
'color': 'white',
'padding': '10px',
'borderRadius': '5px',
'fontSize': '14px',
'display': 'none',
'whiteSpace': 'nowrap'
}
)
],
style={
'position': 'fixed',
'left': '10px',
'top': '50%',
'transform': 'translateY(-50%)',
'zIndex': 1000
}
),
# Main content
html.Div([
html.H1("Elon Musk Tweet Time Analysis (EST)"),
html.Div(id='date-picker-container', children=[
dcc.Dropdown(
id='multi-date-picker',
options=[{'label': str(date), 'value': str(date)} for date in all_dates],
value=default_date,
multi=True,
searchable=True,
placeholder="Search and select dates (YYYY-MM-DD)",
style={'width': '100%'}
)
]),
dcc.Dropdown(
id='multi-interval-picker',
options=interval_options,
value=10,
style={'width': '50%', 'marginTop': '10px'}
),
html.Div(id='days-display-container', style={'display': 'none'}, children=[
dcc.Dropdown(
id='days-display-picker',
options=days_options,
value=30, # Default changed to 30 since 1 is removed
style={'width': '50%', 'marginTop': '10px'}
)
]),
html.Div(id='multi-day-warning', style={'color': 'red', 'margin': '10px'}),
dcc.Checklist(
id='time-zone-checklist',
options=[
{'label': 'California Time (PST)', 'value': 'PST'},
{'label': 'Texas Time (CST)', 'value': 'CST'}
],
value=['PST'],
style={'margin': '10px'}
),
html.Div(id='multi-tweet-summary', style={'fontSize': '20px', 'margin': '10px'}),
dcc.Tabs(id='tabs', value='line', children=[
dcc.Tab(label='Line Chart', value='line'),
dcc.Tab(label='Heatmap', value='heatmap'),
dcc.Tab(label='Scatter Plot', value='scatter'),
]),
html.Div(id='tabs-content'),
], style={'marginLeft': '50px'}),
dcc.Interval(id='clock-interval', interval=1000, n_intervals=0)
])
# Clientside callback (unchanged)
clientside_callback(
"""
function(n_intervals) {
const container = document.getElementById('clock-container');
const tooltip = document.getElementById('clock-tooltip');
if (container && tooltip) {
container.addEventListener('mouseover', () => {
tooltip.style.display = 'block';
});
container.addEventListener('mouseout', () => {
tooltip.style.display = 'none';
});
}
return window.dash_clientside.no_update;
}
""",
Output('clock-container', 'id'),
Input('clock-interval', 'n_intervals'),
prevent_initial_call=False
)
# Auxiliary functions (unchanged)
def aggregate_data(data, interval):
all_minutes = pd.DataFrame({'interval_group': range(0, 1440, interval)})
result = []
for date in data['date'].unique():
day_data = data[data['date'] == date].copy()
day_data['interval_group'] = (day_data['minute_of_day'] // interval) * interval
agg = day_data.groupby('interval_group').size().reset_index(name='tweet_count')
complete_data = all_minutes.merge(agg, on='interval_group', how='left').fillna({'tweet_count': 0})
complete_data['date'] = date
result.append(complete_data)
return pd.concat(result, ignore_index=True)
def generate_xticks(interval):
if interval <= 5:
tick_step = 60
elif interval <= 10:
tick_step = 60
elif interval <= 30:
tick_step = 120
else:
tick_step = 240
ticks = list(range(0, 1440, tick_step))
tick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in ticks]
return ticks, tick_labels
def minutes_to_time(minutes):
hours = minutes // 60
mins = minutes % 60
return f"{hours:02d}:{mins:02d}"
# Callback for updating clocks (unchanged)
@app.callback(
[Output('pst-clock', 'children'),
Output('cst-clock', 'children'),
Output('est-clock', 'children')],
[Input('clock-interval', 'n_intervals')]
)
def update_clocks(n):
now_utc = datetime.now(pytz.UTC)
pst_time = now_utc.astimezone(pacific).strftime('%Y-%m-%d %H:%M:%S PST')
cst_time = now_utc.astimezone(central).strftime('%Y-%m-%d %H:%M:%S CST')
est_time = now_utc.astimezone(eastern).strftime('%Y-%m-%d %H:%M:%S EST')
return f"🏛️/🌴: {pst_time}", f"🚀: {cst_time}", f"𝕏: {est_time}"
# Callback for toggling controls visibility (unchanged)
@app.callback(
[Output('date-picker-container', 'style'),
Output('days-display-container', 'style')],
[Input('tabs', 'value')]
)
def toggle_controls_visibility(tab):
if tab == 'heatmap':
return {'display': 'none'}, {'display': 'block'}
return {'display': 'block'}, {'display': 'none'}
# Callback for updating tabs content (Modified to add Thursday-Friday lines)
@app.callback(
[Output('tabs-content', 'children'),
Output('multi-day-warning', 'children'),
Output('multi-tweet-summary', 'children')],
[Input('tabs', 'value'),
Input('multi-date-picker', 'value'),
Input('multi-interval-picker', 'value'),
Input('time-zone-checklist', 'value'),
Input('days-display-picker', 'value')]
)
def render_tab_content(tab, selected_dates, interval, time_zones, days_to_display):
warning = ""
if tab != 'heatmap':
if len(selected_dates) > 10:
selected_dates = selected_dates[:10]
warning = "Maximum of 10 days can be selected. Showing first 10 selected days."
selected_dates = [datetime.strptime(date, '%Y-%m-%d').date() for date in selected_dates]
else:
selected_dates = sorted(all_dates, reverse=True)[:days_to_display]
multi_data_agg = agg_df[agg_df['date'].isin(selected_dates)].copy()
if multi_data_agg.empty:
multi_data_agg = pd.DataFrame({'date': selected_dates, 'minute_of_day': [0] * len(selected_dates)})
tweet_count_total = 0
else:
tweet_count_total = multi_data_agg['tweet_count'].sum()
multi_data_raw = df[df['date'].isin(selected_dates)].copy()
if multi_data_raw.empty:
tweet_count_total = 0
agg_data = aggregate_data(multi_data_agg, interval)
xticks, xtick_labels = generate_xticks(interval)
if tab == 'line':
fig = go.Figure()
for date in selected_dates:
day_data = agg_data[agg_data['date'] == date]
hover_times = [f"{date} {minutes_to_time(minute)} EST" for minute in day_data['interval_group']]
fig.add_trace(go.Scatter(
x=day_data['interval_group'],
y=day_data['tweet_count'],
mode='lines',
name=str(date),
customdata=hover_times,
hovertemplate='%{customdata}<br>Tweets: %{y}<extra></extra>'
))
elif tab == 'heatmap':
pivot_data = agg_data.pivot(index='date', columns='interval_group', values='tweet_count').fillna(0)
pivot_data.index = pivot_data.index.astype(str)
fig = go.Figure(data=go.Heatmap(
z=pivot_data.values,
x=[minutes_to_time(m) for m in pivot_data.columns],
y=pivot_data.index,
colorscale='Viridis',
hoverongaps=False,
hovertemplate='%{y} %{x} EST<br>Tweets: %{z}<extra></extra>'
))
for i, date_str in enumerate(pivot_data.index):
date = datetime.strptime(date_str, '%Y-%m-%d').date()
if date.weekday() == 4: # Friday
prev_date = date - timedelta(days=1)
if str(prev_date) in pivot_data.index:
y_position = i / len(pivot_data.index)
fig.add_hline(
y=1-y_position,
line_dash="dash",
line_color="white",
xref="x",
yref="paper"
)
fig.update_layout(
title = f'Tweet Heatmap (Interval: {interval} minutes, EST, {len(selected_dates)} days)',
xaxis_title = 'Time of Day (HH:MM EST)',
yaxis_title = 'Date',
height = max(400, len(selected_dates) * 20),
yaxis = dict(autorange='reversed')
)
elif tab == 'scatter':
fig = go.Figure()
for date in selected_dates:
day_data = multi_data_raw[multi_data_raw['date'] == date]
if not day_data.empty:
hover_times = [t.strftime('%Y-%m-%d %H:%M:%S EST') for t in day_data['datetime_est']]
fig.add_trace(go.Scatter(
x=day_data['minute_of_day'],
y=[str(date)] * len(day_data),
mode='markers',
name=str(date),
customdata=hover_times,
hovertemplate='%{customdata}<extra></extra>',
marker=dict(size=8)
))
if tab in ['line', 'scatter']:
if 'PST' in time_zones:
pacific_2am_est = (2 + 3) * 60
pacific_7am_est = (7 + 3) * 60
fig.add_vline(x=pacific_2am_est, line_dash="dash", line_color="blue", annotation_text="CA 2AM PST")
fig.add_vline(x=pacific_7am_est, line_dash="dash", line_color="blue", annotation_text="CA 7AM PST")
if 'CST' in time_zones:
central_2am_est = (2 + 1) * 60
central_7am_est = (7 + 1) * 60
fig.add_vline(x=central_2am_est, line_dash="dash", line_color="green", annotation_text="TX 2AM CST")
fig.add_vline(x=central_7am_est, line_dash="dash", line_color="green", annotation_text="TX 7AM CST")
if tab in ['line', 'scatter']:
fig.update_layout(
title=f'{"Line" if tab == "line" else "Scatter"} Tweet Frequency (Interval: {interval} minutes, EST)',
xaxis_title='Eastern Time (HH:MM)',
yaxis_title='Tweet Count' if tab == 'line' else 'Date',
xaxis=dict(range=[0, 1440], tickvals=xticks, ticktext=xtick_labels, tickangle=45),
height=600,
showlegend=True,
yaxis=dict(autorange='reversed') if tab == 'scatter' else None
)
summary = f"Total tweets for selected dates: {int(tweet_count_total)}"
return dcc.Graph(figure=fig), warning, summary
# Run the app
if __name__ == '__main__':
app.run_server(debug=True)

415
assets/bWLwgP.css Normal file
View File

@ -0,0 +1,415 @@
/* Table of contents
- Plotly.js
- Grid
- Base Styles
- Typography
- Links
- Buttons
- Forms
- Lists
- Code
- Tables
- Spacing
- Utilities
- Clearing
- Media Queries
*/
/* PLotly.js
*/
/* plotly.js's modebar's z-index is 1001 by default
* https://github.com/plotly/plotly.js/blob/7e4d8ab164258f6bd48be56589dacd9bdd7fded2/src/css/_modebar.scss#L5
* In case a dropdown is above the graph, the dropdown's options
* will be rendered below the modebar
* Increase the select option's z-index
*/
/* This was actually not quite right -
dropdowns were overlapping each other (edited October 26)
.Select {
z-index: 1002;
}*/
/* Grid
*/
.container {
position: relative;
width: 100%;
max-width: 960px;
margin: 0 auto;
padding: 0 20px;
box-sizing: border-box; }
.column,
.columns {
width: 100%;
float: left;
box-sizing: border-box; }
/* For devices larger than 400px */
@media (min-width: 400px) {
.container {
width: 85%;
padding: 0; }
}
/* For devices larger than 550px */
@media (min-width: 550px) {
.container {
width: 80%; }
.column,
.columns {
margin-left: 4%; }
.column:first-child,
.columns:first-child {
margin-left: 0; }
.one.column,
.one.columns { width: 4.66666666667%; }
.two.columns { width: 13.3333333333%; }
.three.columns { width: 22%; }
.four.columns { width: 30.6666666667%; }
.five.columns { width: 39.3333333333%; }
.six.columns { width: 48%; }
.seven.columns { width: 56.6666666667%; }
.eight.columns { width: 65.3333333333%; }
.nine.columns { width: 74.0%; }
.ten.columns { width: 82.6666666667%; }
.eleven.columns { width: 91.3333333333%; }
.twelve.columns { width: 100%; margin-left: 0; }
.one-third.column { width: 30.6666666667%; }
.two-thirds.column { width: 65.3333333333%; }
.one-half.column { width: 48%; }
/* Offsets */
.offset-by-one.column,
.offset-by-one.columns { margin-left: 8.66666666667%; }
.offset-by-two.column,
.offset-by-two.columns { margin-left: 17.3333333333%; }
.offset-by-three.column,
.offset-by-three.columns { margin-left: 26%; }
.offset-by-four.column,
.offset-by-four.columns { margin-left: 34.6666666667%; }
.offset-by-five.column,
.offset-by-five.columns { margin-left: 43.3333333333%; }
.offset-by-six.column,
.offset-by-six.columns { margin-left: 52%; }
.offset-by-seven.column,
.offset-by-seven.columns { margin-left: 60.6666666667%; }
.offset-by-eight.column,
.offset-by-eight.columns { margin-left: 69.3333333333%; }
.offset-by-nine.column,
.offset-by-nine.columns { margin-left: 78.0%; }
.offset-by-ten.column,
.offset-by-ten.columns { margin-left: 86.6666666667%; }
.offset-by-eleven.column,
.offset-by-eleven.columns { margin-left: 95.3333333333%; }
.offset-by-one-third.column,
.offset-by-one-third.columns { margin-left: 34.6666666667%; }
.offset-by-two-thirds.column,
.offset-by-two-thirds.columns { margin-left: 69.3333333333%; }
.offset-by-one-half.column,
.offset-by-one-half.columns { margin-left: 52%; }
}
/* Base Styles
*/
/* NOTE
html is set to 62.5% so that all the REM measurements throughout Skeleton
are based on 10px sizing. So basically 1.5rem = 15px :) */
html {
font-size: 62.5%; }
body {
font-size: 1.5em; /* currently ems cause chrome bug misinterpreting rems on body element */
line-height: 1.6;
font-weight: 400;
font-family: "Open Sans", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif;
color: rgb(50, 50, 50); }
/* Typography
*/
h1, h2, h3, h4, h5, h6 {
margin-top: 0;
margin-bottom: 0;
font-weight: 300; }
h1 { font-size: 4.5rem; line-height: 1.2; letter-spacing: -.1rem; margin-bottom: 2rem; }
h2 { font-size: 3.6rem; line-height: 1.25; letter-spacing: -.1rem; margin-bottom: 1.8rem; margin-top: 1.8rem;}
h3 { font-size: 3.0rem; line-height: 1.3; letter-spacing: -.1rem; margin-bottom: 1.5rem; margin-top: 1.5rem;}
h4 { font-size: 2.6rem; line-height: 1.35; letter-spacing: -.08rem; margin-bottom: 1.2rem; margin-top: 1.2rem;}
h5 { font-size: 2.2rem; line-height: 1.5; letter-spacing: -.05rem; margin-bottom: 0.6rem; margin-top: 0.6rem;}
h6 { font-size: 2.0rem; line-height: 1.6; letter-spacing: 0; margin-bottom: 0.75rem; margin-top: 0.75rem;}
p {
margin-top: 0; }
/* Blockquotes
*/
blockquote {
border-left: 4px lightgrey solid;
padding-left: 1rem;
margin-top: 2rem;
margin-bottom: 2rem;
margin-left: 0rem;
}
/* Links
*/
a {
color: #1EAEDB;
text-decoration: underline;
cursor: pointer;}
a:hover {
color: #0FA0CE; }
/* Buttons
*/
.button,
button,
input[type="submit"],
input[type="reset"],
input[type="button"] {
display: inline-block;
height: 38px;
padding: 0 30px;
color: #555;
text-align: center;
font-size: 11px;
font-weight: 600;
line-height: 38px;
letter-spacing: .1rem;
text-transform: uppercase;
text-decoration: none;
white-space: nowrap;
background-color: transparent;
border-radius: 4px;
border: 1px solid #bbb;
cursor: pointer;
box-sizing: border-box; }
.button:hover,
button:hover,
input[type="submit"]:hover,
input[type="reset"]:hover,
input[type="button"]:hover,
.button:focus,
button:focus,
input[type="submit"]:focus,
input[type="reset"]:focus,
input[type="button"]:focus {
color: #333;
border-color: #888;
outline: 0; }
.button.button-primary,
button.button-primary,
input[type="submit"].button-primary,
input[type="reset"].button-primary,
input[type="button"].button-primary {
color: #FFF;
background-color: #33C3F0;
border-color: #33C3F0; }
.button.button-primary:hover,
button.button-primary:hover,
input[type="submit"].button-primary:hover,
input[type="reset"].button-primary:hover,
input[type="button"].button-primary:hover,
.button.button-primary:focus,
button.button-primary:focus,
input[type="submit"].button-primary:focus,
input[type="reset"].button-primary:focus,
input[type="button"].button-primary:focus {
color: #FFF;
background-color: #1EAEDB;
border-color: #1EAEDB; }
/* Forms
*/
input[type="email"],
input[type="number"],
input[type="search"],
input[type="text"],
input[type="tel"],
input[type="url"],
input[type="password"],
textarea,
select {
height: 38px;
padding: 6px 10px; /* The 6px vertically centers text on FF, ignored by Webkit */
background-color: #fff;
border: 1px solid #D1D1D1;
border-radius: 4px;
box-shadow: none;
box-sizing: border-box;
font-family: inherit;
font-size: inherit; /*https://stackoverflow.com/questions/6080413/why-doesnt-input-inherit-the-font-from-body*/}
/* Removes awkward default styles on some inputs for iOS */
input[type="email"],
input[type="number"],
input[type="search"],
input[type="text"],
input[type="tel"],
input[type="url"],
input[type="password"],
textarea {
-webkit-appearance: none;
-moz-appearance: none;
appearance: none; }
textarea {
min-height: 65px;
padding-top: 6px;
padding-bottom: 6px; }
input[type="email"]:focus,
input[type="number"]:focus,
input[type="search"]:focus,
input[type="text"]:focus,
input[type="tel"]:focus,
input[type="url"]:focus,
input[type="password"]:focus,
textarea:focus,
select:focus {
border: 1px solid #33C3F0;
outline: 0; }
label,
legend {
display: block;
margin-bottom: 0px; }
fieldset {
padding: 0;
border-width: 0; }
input[type="checkbox"],
input[type="radio"] {
display: inline; }
label > .label-body {
display: inline-block;
margin-left: .5rem;
font-weight: normal; }
/* Lists
*/
ul {
list-style: circle inside; }
ol {
list-style: decimal inside; }
ol, ul {
padding-left: 0;
margin-top: 0; }
ul ul,
ul ol,
ol ol,
ol ul {
margin: 1.5rem 0 1.5rem 3rem;
font-size: 90%; }
li {
margin-bottom: 1rem; }
/* Tables
*/
table {
border-collapse: collapse;
}
th:not(.CalendarDay),
td:not(.CalendarDay) {
padding: 12px 15px;
text-align: left;
border-bottom: 1px solid #E1E1E1; }
th:first-child:not(.CalendarDay),
td:first-child:not(.CalendarDay) {
padding-left: 0; }
th:last-child:not(.CalendarDay),
td:last-child:not(.CalendarDay) {
padding-right: 0; }
/* Spacing
*/
button,
.button {
margin-bottom: 0rem; }
input,
textarea,
select,
fieldset {
margin-bottom: 0rem; }
pre,
dl,
figure,
table,
form {
margin-bottom: 0rem; }
p,
ul,
ol {
margin-bottom: 0.75rem; }
/* Utilities
*/
.u-full-width {
width: 100%;
box-sizing: border-box; }
.u-max-full-width {
max-width: 100%;
box-sizing: border-box; }
.u-pull-right {
float: right; }
.u-pull-left {
float: left; }
/* Misc
*/
hr {
margin-top: 3rem;
margin-bottom: 3.5rem;
border-width: 0;
border-top: 1px solid #E1E1E1; }
/* Clearing
*/
/* Self Clearing Goodness */
.container:after,
.row:after,
.u-cf {
content: "";
display: table;
clear: both; }
/* Media Queries
*/
/*
Note: The best way to structure the use of media queries is to create the queries
near the relevant code. For example, if you wanted to change the styles for buttons
on small devices, paste the mobile query code up in the buttons section and style it
there.
*/
/* Larger than mobile */
@media (min-width: 400px) {}
/* Larger than phablet (also point when grid becomes active) */
@media (min-width: 550px) {}
/* Larger than tablet */
@media (min-width: 750px) {}
/* Larger than desktop */
@media (min-width: 1000px) {}
/* Larger than Desktop HD */
@media (min-width: 1200px) {}

BIN
assets/elon.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

20279
elonmusk.csv

File diff suppressed because it is too large Load Diff

12749
fixed.csv

File diff suppressed because it is too large Load Diff

5
main.py Normal file
View File

@ -0,0 +1,5 @@
from pkg.dash.app_init import app
# Run the app
if __name__ == '__main__':
app.run_server(debug=True)

View File

@ -184,15 +184,15 @@ def update_input_scrollbar():
def save_history_to_file():
"""Save run history to a text file"""
with open("run_history.txt", "w") as f:
with open("cache/run_history.txt", "w") as f:
json.dump(run_history, f, indent=4)
def load_history_from_file():
"""Load run history from a text file"""
global run_history
if os.path.exists("run_history.txt"):
with open("run_history.txt", "r") as f:
if os.path.exists("cache/run_history.txt"):
with open("cache/run_history.txt", "r") as f:
run_history = json.load(f)
else:
run_history = []

62
pkg/config.py Normal file
View File

@ -0,0 +1,62 @@
from sqlalchemy import create_engine
import pandas as pd
import pytz
# Database connection configuration
DB_CONFIG = {
'host': '8.155.23.172',
'port': 3306,
'user': 'root2',
'password': 'tG0f6PVYh18le41BCb',
'database': 'elonX'
}
TABLE_NAME = 'elon_tweets'
db_uri = f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
engine = create_engine(db_uri)
# Time zone
eastern = pytz.timezone('America/New_York')
pacific = pytz.timezone('America/Los_Angeles')
central = pytz.timezone('America/Chicago')
# CSV update file path
INPUT_FILE = 'cache/elonmusk.csv'
OUTPUT_FILE = 'cache/fixed.csv'
# Time interval and days options
interval_options = [
{'label': '1 minute', 'value': 1},
{'label': '5 minutes', 'value': 5},
{'label': '10 minutes', 'value': 10},
{'label': '30 minutes', 'value': 30},
{'label': '60 minutes', 'value': 60}
]
days_options = [
{'label': '7 days', 'value': 7},
{'label': '30 days', 'value': 30},
{'label': '90 days', 'value': 90},
{'label': '120 days', 'value': 120},
{'label': '240 days', 'value': 240}
]
# Global render data
class RenderData:
def __init__(self):
self.global_df = None
self.global_agg_df = None
self.all_dates = []
self.default_date = []
self.load_data()
def load_data(self):
df = pd.read_sql(f'SELECT timestamp FROM {TABLE_NAME}', con=engine)
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
df['datetime_est'] = df['datetime'].dt.tz_localize('UTC').dt.tz_convert(eastern)
df['date'] = df['datetime_est'].dt.date
df['minute_of_day'] = df['datetime_est'].dt.hour * 60 + df['datetime_est'].dt.minute
agg_df = df.groupby(['date', 'minute_of_day']).size().reset_index(name='tweet_count')
self.global_df = df.copy()
self.global_agg_df = agg_df.copy()
self.all_dates = sorted(self.global_agg_df['date'].unique(), reverse=True)
self.default_date = [str(self.all_dates[0])]
render_data = RenderData()

0
pkg/dash/__init__.py Normal file
View File

0
pkg/dash/api/__init__.py Normal file
View File

View File

@ -0,0 +1,14 @@
from pkg.dash.app_init import app
from pkg.get_tweets import process_tweets
from flask import jsonify
from pkg.config import render_data
# Simulated backend endpoint for processing tweets
@app.server.route('/api/process_tweets', methods=['GET'])
def api_process_tweets():
success, message = process_tweets()
if success:
render_data.load_data()
return jsonify({'message': message, 'default_date': render_data.default_date}), 200
else:
return jsonify({'error': message}), 500

138
pkg/dash/app_html.py Normal file
View File

@ -0,0 +1,138 @@
from dash import dcc, html
from pkg.config import interval_options, days_options, render_data
def layout_config(app):
# Dash app layout (unchanged except default days value)
app.layout = html.Div([
# Left sidebar with clock button and tooltip (unchanged)
html.Div(
id='clock-container',
children=[
html.Div(
id='clock-button',
children='🕒',
style={
'fontSize': '24px',
'cursor': 'pointer',
'padding': '5px',
}
),
html.Div(
id='clock-tooltip',
children=[
html.Div(id='pst-clock')
],
style={
'position': 'absolute',
'left': '35px',
'top': '0px',
'backgroundColor': 'rgba(0, 0, 0, 0.8)',
'color': 'white',
'padding': '10px',
'borderRadius': '5px',
'fontSize': '14px',
'display': 'none',
'whiteSpace': 'nowrap'
}
),
html.Div(
id='play-button',
children='▶️',
n_clicks=0,
style={
'fontSize': '24px',
'cursor': 'pointer',
'padding': '5px',
'marginTop': '10px'
}
),
html.Div(
id='play-tooltip',
children=[],
style={
'position': 'absolute',
'left': '35px',
'top': '40px',
'backgroundColor': 'rgba(0, 0, 0, 0.8)',
'color': 'white',
'padding': '10px',
'borderRadius': '5px',
'fontSize': '14px',
'display': 'none',
'whiteSpace': 'nowrap'
}
),
html.A(
href='https://x.com/elonmusk',
children=[
html.Img(
src='https://pbs.twimg.com/profile_images/1893803697185910784/Na5lOWi5_400x400.jpg',
style={
'height': '24px', # Matches fontSize of other buttons
'width': '24px',
'cursor': 'pointer',
'padding': '5px',
'marginTop': '10px'
}
)
]
)
],
style={
'position': 'fixed',
'left': '10px',
'top': '50%',
'transform': 'translateY(-50%)',
'zIndex': 1000
}
),
# Main content
html.Div([
html.H1("Elon Musk Tweet Time Analysis (EST)"),
html.Div(id='date-picker-container', children=[
dcc.Dropdown(
id='multi-date-picker',
options=[{'label': str(date), 'value': str(date)} for date in render_data.all_dates],
value=render_data.default_date,
multi=True,
searchable=True,
placeholder="Search and select dates (YYYY-MM-DD)",
style={'width': '100%'}
)
]),
dcc.Dropdown(
id='multi-interval-picker',
options=interval_options,
value=10,
style={'width': '50%', 'marginTop': '10px'}
),
html.Div(id='days-display-container', style={'display': 'none'}, children=[
dcc.Dropdown(
id='days-display-picker',
options=days_options,
value=30, # Default changed to 30 since 1 is removed
style={'width': '50%', 'marginTop': '10px'}
)
]),
html.Div(id='multi-day-warning', style={'color': 'red', 'margin': '10px'}),
dcc.Checklist(
id='time-zone-checklist',
options=[
{'label': 'California Time (PST)', 'value': 'PST'},
{'label': 'Texas Time (CST)', 'value': 'CST'}
],
value=['PST'],
style={'margin': '10px'}
),
html.Div(id='multi-tweet-summary', style={'fontSize': '20px', 'margin': '10px'}),
dcc.Tabs(id='tabs', value='line', children=[
dcc.Tab(label='Line', value='line'),
dcc.Tab(label='Heatmap', value='heatmap'),
dcc.Tab(label='Heatmap(1-day)', value='one_day_heatmap'),
]),
html.Div(id='tabs-content'),
], style={'marginLeft': '50px'}),
dcc.Interval(id='clock-interval', interval=1000, n_intervals=0)
])
return app

24
pkg/dash/app_init.py Normal file
View File

@ -0,0 +1,24 @@
import dash
from .app_html import layout_config
from .javascript import setting_callback
import os
import importlib
external_stylesheets = ['assets/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app = layout_config(app)
setting_callback()
base_dir = os.path.dirname(__file__)
sub_dirs = ['func', 'api']
for sub_dir in sub_dirs:
dir_path = os.path.join(base_dir, sub_dir)
if os.path.exists(dir_path):
for filename in os.listdir(dir_path):
if filename.endswith('.py') and filename != '__init__.py':
module_name = filename[:-3]
importlib.import_module(f'.{sub_dir}.{module_name}', package='pkg.dash')

View File

29
pkg/dash/func/clock.py Normal file
View File

@ -0,0 +1,29 @@
import pytz
from pkg.dash.app_init import app
from dash.dependencies import Input, Output
from pkg.config import pacific,central,eastern
from datetime import datetime
from dash import html
@app.callback(
[Output('pst-clock', 'children')],
[Input('clock-interval', 'n_intervals')]
)
def update_clocks(n):
now_utc = datetime.now(pytz.UTC)
pst_time = now_utc.astimezone(pacific).strftime('%Y-%m-%d %H:%M:%S PST')
cst_time = now_utc.astimezone(central).strftime('%Y-%m-%d %H:%M:%S CST')
est_time = now_utc.astimezone(eastern).strftime('%Y-%m-%d %H:%M:%S EST')
table = html.Table([
html.Tr([html.Td('𝕏', style={'textAlign': 'center'}), html.Td(pst_time)]),
html.Tr([html.Td('🚀', style={'textAlign': 'center'}), html.Td(cst_time)]),
html.Tr([html.Td('🏛️🌴', style={'textAlign': 'center'}), html.Td(est_time)])
], style={
'width': '100%',
'textAlign': 'left',
'borderCollapse': 'collapse'
})
return [table]

141
pkg/dash/func/render.py Normal file
View File

@ -0,0 +1,141 @@
from datetime import datetime, timedelta
from dash.dependencies import Input, Output
from pkg.dash.app_init import app
from pkg.config import render_data
from pkg.tool import aggregate_data, generate_xticks, minutes_to_time
from dash import dcc
import plotly.graph_objs as go
import pandas as pd
@app.callback(
[Output('tabs-content', 'children'),
Output('multi-day-warning', 'children'),
Output('multi-tweet-summary', 'children')],
[Input('tabs', 'value'),
Input('multi-date-picker', 'value'),
Input('multi-interval-picker', 'value'),
Input('time-zone-checklist', 'value'),
Input('days-display-picker', 'value')]
)
def render_tab_content(tab, selected_dates, interval, time_zones, days_to_display):
warning = ""
if tab == 'line':
if not selected_dates: # Handle None or empty list
selected_dates = [datetime.now().date()] # Default to today
warning = "No dates selected. Showing todays data."
if len(selected_dates) > 10:
selected_dates = selected_dates[:10]
warning = "Maximum of 10 days can be selected. Showing first 10 selected days."
selected_dates = [datetime.strptime(date, '%Y-%m-%d').date() for date in selected_dates]
else:
available_dates = sorted(render_data.global_agg_df['date'].unique(), reverse=True)
selected_dates = available_dates[:days_to_display] if available_dates else [datetime.now().date()]
if not available_dates:
warning = "No data available. Showing todays date with zero tweets."
multi_data_agg = render_data.global_agg_df[render_data.global_agg_df['date'].isin(selected_dates)].copy()
if multi_data_agg.empty:
multi_data_agg = pd.DataFrame({
'date': selected_dates,
'minute_of_day': [0] * len(selected_dates),
})
tweet_count_total = multi_data_agg.get('tweet_count', pd.Series([0] * len(multi_data_agg))).sum()
multi_data_raw = render_data.global_df[render_data.global_df['date'].isin(selected_dates)].copy()
if multi_data_raw.empty:
tweet_count_total = 0
agg_data = aggregate_data(multi_data_agg, interval)
xticks, xtick_labels = generate_xticks(interval)
if tab == 'line':
fig = go.Figure()
for date in selected_dates:
day_data = agg_data[agg_data['date'] == date]
hover_times = [f"{date} {minutes_to_time(minute)} EST" for minute in day_data['interval_group']]
fig.add_trace(go.Scatter(
x=day_data['interval_group'],
y=day_data['tweet_count'],
mode='lines',
name=str(date),
customdata=hover_times,
hovertemplate='%{customdata}<br>Tweets: %{y}<extra></extra>'
))
elif tab == 'heatmap':
pivot_data = agg_data.pivot(index='date', columns='interval_group', values='tweet_count').fillna(0)
pivot_data.index = pivot_data.index.astype(str)
fig = go.Figure(data=go.Heatmap(
z=pivot_data.values,
x=[minutes_to_time(m) for m in pivot_data.columns],
y=pivot_data.index,
colorscale='Viridis',
hoverongaps=False,
hovertemplate='%{y} %{x} EST<br>Tweets: %{z}<extra></extra>'
))
for i, date_str in enumerate(pivot_data.index):
date = datetime.strptime(date_str, '%Y-%m-%d').date()
if date.weekday() == 4: # Friday
prev_date = date - timedelta(days=1)
if str(prev_date) in pivot_data.index:
y_position = i / len(pivot_data.index)
fig.add_hline(
y=1 - y_position,
line_dash="dash",
line_color="white",
xref="x",
yref="paper"
)
fig.update_layout(
title=f'Tweet Heatmap (Interval: {interval} minutes, EST, {len(selected_dates)} days)',
xaxis_title='Time of Day (HH:MM EST)',
yaxis_title='Date',
height=max(400, len(selected_dates) * 20),
yaxis=dict(autorange='reversed')
)
elif tab == 'one_day_heatmap':
one_day_data = agg_data.groupby('interval_group')['tweet_count'].sum().reset_index()
hours = list(range(24))
intervals_per_hour = 60 // interval
z_values = [[0] * intervals_per_hour for _ in range(24)]
for _, row in one_day_data.iterrows():
minute = row['interval_group']
hour = int(minute // 60) # Convert to integer
interval_idx = int((minute % 60) // interval) # Convert to integer
if hour < 24:
z_values[hour][interval_idx] = row['tweet_count']
x_labels = [f"{i * interval:02d}" for i in range(intervals_per_hour)]
fig = go.Figure(data=go.Heatmap(
z=z_values,
x=x_labels,
y=[f"{h:02d}" for h in hours],
colorscale='Viridis',
hoverongaps=False,
hovertemplate='%{y}:%{x} EST<br>Tweets: %{z}<extra></extra>'
))
if tab in ['line', 'one_day_heatmap']:
fig.update_layout(
title=f'{"Line" if tab == "line" else "One-Day Heatmap"} Tweet Frequency (Interval: {interval} minutes, EST, {len(selected_dates)} days)',
xaxis_title='Minutes' if tab == 'one_day_heatmap' else 'Eastern Time (HH:MM)',
yaxis_title='Hour of Day' if tab == 'one_day_heatmap' else 'Tweet Count',
xaxis=dict(
range=[0, 1440] if tab == 'line' else None,
tickvals=xticks if tab == 'line' else None,
ticktext=xtick_labels if tab == 'line' else None,
tickangle=45 if tab == 'line' else 0
),
height=600,
showlegend=(tab == 'line'),
yaxis=dict(autorange='reversed') if tab == 'one_day_heatmap' else None
)
summary = f"Total tweets for selected dates: {int(tweet_count_total)}"
return dcc.Graph(figure=fig), warning, summary

14
pkg/dash/func/ui.py Normal file
View File

@ -0,0 +1,14 @@
from pkg.dash.app_init import app
from dash.dependencies import Input, Output
@app.callback(
[Output('date-picker-container', 'style'),
Output('days-display-container', 'style'),
Output('time-zone-checklist', 'style')],
[Input('tabs', 'value')]
)
def toggle_controls_visibility(tab):
if tab == 'heatmap' or tab == 'one_day_heatmap':
return {'display': 'none'}, {'display': 'block'}, {'display': 'none'}
return {'display': 'block'}, {'display': 'none'}, {'display': 'block'}

77
pkg/dash/javascript.py Normal file
View File

@ -0,0 +1,77 @@
from dash.dependencies import Input, Output, State
from dash import clientside_callback
def setting_callback():
clientside_callback(
"""
function(n_intervals) {
const button = document.getElementById('clock-button');
const tooltip = document.getElementById('clock-tooltip');
if (button && tooltip) {
button.addEventListener('mouseover', () => {
tooltip.style.display = 'block';
});
button.addEventListener('mouseout', () => {
tooltip.style.display = 'none';
});
}
return window.dash_clientside.no_update;
}
""",
Output('clock-container', 'id'),
Input('clock-interval', 'n_intervals'),
prevent_initial_call=False
)
# Clientside callback for play button with API request
clientside_callback(
"""
async function(n_clicks, existing_children) {
const button = document.getElementById('play-button');
const tooltip = document.getElementById('play-tooltip');
if (!button || !tooltip) return ['▶️', ''];
if (n_clicks > 0) {
button.style.cursor = 'wait';
button.innerHTML = '🔄';
tooltip.style.display = 'none';
try {
const response = await fetch('/api/process_tweets', {
method: 'GET',
headers: {'Content-Type': 'application/json'}
});
const data = await response.json();
if (response.ok) {
button.innerHTML = '';
tooltip.innerHTML = `Success: ${data.message}`;
tooltip.style.display = 'block';
} else {
button.innerHTML = '🆘';
tooltip.innerHTML = `Error: ${data.error}`;
tooltip.style.display = 'block';
}
} catch (error) {
button.innerHTML = '🆘';
tooltip.innerHTML = `Error: Network failure - ${error.message}`;
tooltip.style.display = 'block';
}
setTimeout(() => {
button.innerHTML = '▶️';
button.style.cursor = 'pointer';
tooltip.style.display = 'none';
tooltip.innerHTML = '';
}, 3000);
return [button.innerHTML, tooltip.innerHTML];
}
return ['▶️', ''];
}
""",
[Output('play-button', 'children'), Output('play-tooltip', 'children')],
Input('play-button', 'n_clicks'),
State('play-button', 'children'),
prevent_initial_call=True
)

View File

@ -1,59 +1,33 @@
import csv
import re
import mysql.connector
from datetime import datetime
import requests
# 文件路径
INPUT_FILE = '../elonmusk.csv'
OUTPUT_FILE = '../fixed.csv'
# 数据库连接配置
DB_CONFIG = {
'host': '8.155.23.172',
'port': 3306,
'user': 'root2',
'password': 'tG0f6PVYh18le41BCb',
'database': 'elonX'
}
TABLE_NAME = 'elon_tweets'
from datetime import datetime
from pkg.config import TABLE_NAME,DB_CONFIG,INPUT_FILE,OUTPUT_FILE
def download_file(file_path):
url = 'https://www.xtracker.io/api/download'
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Origin': 'https://www.xtracker.io',
'Pragma': 'no-cache',
'Referer': 'https://www.xtracker.io/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5',
'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Content-Type': 'application/json',
'Origin': 'https://www.xtracker.io', 'Pragma': 'no-cache', 'Referer': 'https://www.xtracker.io/',
'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0',
'sec-ch-ua': '"Not(A:Brand";v="99", "Microsoft Edge";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua': '"Not(A:Brand";v="99", "Microsoft Edge";v="133", "Chromium";v="133"', 'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
data = '{"handle":"elonmusk","platform":"X"}'
try:
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
# 直接保存原始字节数据,不进行解码
with open(file_path, 'wb') as f:
f.write(response.content)
print(f"文件已成功下载到 {file_path}(原始字节数据)")
return True, "File downloaded successfully"
else:
print(f"下载失败,状态码:{response.status_code}")
print(f"响应内容:{response.text}")
return False, f"Download failed with status code {response.status_code}: {response.text}"
except Exception as e:
print(f"下载文件时出错:{e}")
return False, f"Error downloading file: {str(e)}"
# 第一步:修复 CSV 文件并添加 rank_id
def fix_line(lines, line_number, rank_id):
full_line = ''.join(lines)
match = re.search(r'^([^,]+),"(.+?)","([A-Z][a-z]{2} \d{1,2}, \d{1,2}:\d{2}:\d{2} (AM|PM) E[SD]T)"$', full_line,
@ -69,7 +43,6 @@ def fix_line(lines, line_number, rank_id):
print(f"Line {line_number} format error: {repr(full_line)}")
return f'{rank_id},{full_line}'
def process_file(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
f_out.write("rank_id,id,text,created_at\n")
@ -90,104 +63,94 @@ def process_file(input_file, output_file):
if buffer:
fixed_line = fix_line(buffer, line_number, rank_id)
f_out.write(fixed_line + '\n')
print(f"CSV 文件已修复并添加 rank_id保存为 {output_file}")
return True, f"CSV 文件已修复并添加 rank_id保存为 {output_file}"
# 第二步:数据库操作
def get_max_rank_id(cursor):
try:
cursor.execute(f"SELECT MAX(rank_id) FROM {TABLE_NAME}")
result = cursor.fetchone()[0]
return result if result is not None else 0
return result if result is not None else 0, True, ""
except mysql.connector.Error as e:
print(f"获取最大 rank_id 出错: {e}")
return 0
return 0, False, f"Error getting max rank_id: {str(e)}"
def import_to_database(input_file):
def import_to_database(input_file: str) -> tuple[bool, str]:
try:
conn = mysql.connector.connect(**DB_CONFIG)
cursor = conn.cursor()
print("成功连接到数据库")
# Use context managers to ensure resources are closed properly
with mysql.connector.connect(**DB_CONFIG) as conn, conn.cursor() as cursor:
current_year = str(datetime.now().year)
max_rank_id, success, error = get_max_rank_id(cursor)
if not success:
return False, error
# 获取当前年份
current_year = str(datetime.now().year)
with open(input_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
try:
next(reader) # Skip header
except StopIteration:
return False, "File is empty or has no valid header"
# 获取数据库中最大 rank_id
max_rank_id = get_max_rank_id(cursor)
print(f"数据库中最大 rank_id: {max_rank_id}")
total_rows, inserted = 0, 0
for row in reader:
if len(row) != 4:
continue
try:
rank_id = int(row[0])
tweet_id = float(row[1])
text, created_at = row[2], row[3]
except (ValueError, IndexError) as e:
return False, f"Invalid data format in row: {str(e)}"
with open(input_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
next(reader) # 跳过表头
total_rows = 0
inserted = 0
if rank_id <= max_rank_id:
continue
for row in reader:
if len(row) != 4:
print(f"跳过无效行: {row}")
continue
rank_id, id_, text, created_at = row
rank_id = int(rank_id)
tweet_id = float(id_)
total_rows += 1
insert_query = f"""
INSERT INTO {TABLE_NAME} (rank_id, id, text, year, created_at, timestamp)
VALUES (%s, %s, %s, %s, %s, %s)
"""
cursor.execute(insert_query, (rank_id, tweet_id, text, current_year, created_at, 0))
inserted += 1
# 只导入 rank_id 大于 max_rank_id 的记录
if rank_id <= max_rank_id:
continue
total_rows += 1
insert_query = f"""
INSERT INTO {TABLE_NAME} (rank_id, id, text, year, created_at, timestamp)
VALUES (%s, %s, %s, %s, %s, %s)
conn.commit()
update_query = f"""
UPDATE {TABLE_NAME}
SET timestamp = UNIX_TIMESTAMP(
CONVERT_TZ(
STR_TO_DATE(
CONCAT(year, ' ', SUBSTRING_INDEX(created_at, ' ', 4)),
'%Y %b %d, %l:%i:%s %p'
),
CASE
WHEN RIGHT(created_at, 3) = 'EDT' THEN 'America/New_York'
WHEN RIGHT(created_at, 3) = 'EST' THEN 'America/New_York'
ELSE 'UTC'
END,
'UTC'
)
) + 8*60*60
WHERE rank_id > {max_rank_id}
"""
cursor.execute(insert_query, (rank_id, tweet_id, text, current_year, created_at, 0))
inserted += 1
print(f"文本: 【{text}】,这是第 {inserted}")
conn.commit()
print(f"数据库导入完成:总计处理 {total_rows} 行,插入 {inserted}")
# 更新新插入记录的 timestamp使用参数化查询
update_query = f"""
UPDATE {TABLE_NAME}
SET timestamp = UNIX_TIMESTAMP(
CONVERT_TZ(
STR_TO_DATE(
CONCAT(year, ' ', SUBSTRING_INDEX(created_at, ' ', 4)),
'%Y %b %d, %l:%i:%s %p'
),
CASE
WHEN RIGHT(created_at, 3) = 'EDT' THEN 'America/New_York'
WHEN RIGHT(created_at, 3) = 'EST' THEN 'America/New_York'
ELSE 'UTC'
END,
'UTC'
)
) + 8*60*60
WHERE rank_id > {max_rank_id}
"""
cursor.execute(update_query)
conn.commit()
print(f"已更新 rank_id > {max_rank_id} 的记录的时间戳")
cursor.execute(update_query)
conn.commit()
return True, f"Database import completed: {inserted} rows inserted"
except mysql.connector.Error as e:
print(f"数据库错误: {e}")
return False, f"Database error: {str(e)}"
except FileNotFoundError as e:
return False, f"File not found: {str(e)}"
except csv.Error as e:
return False, f"CSV parsing error: {str(e)}"
except Exception as e:
print(f"其他错误: {e}")
finally:
if 'cursor' in locals():
cursor.close()
if 'conn' in locals() and conn.is_connected():
conn.close()
print("数据库连接已关闭")
return False, f"Unexpected error: {str(e)}"
# 主流程
def main():
download_file(INPUT_FILE) # 先下载文件
process_file(INPUT_FILE, OUTPUT_FILE)
import_to_database(OUTPUT_FILE)
if __name__ == "__main__":
main()
def process_tweets():
success, msg = download_file(INPUT_FILE)
if not success:
return False, msg
success, msg = process_file(INPUT_FILE, OUTPUT_FILE)
if not success:
return False, msg
success, msg = import_to_database(OUTPUT_FILE)
return success, msg

48
pkg/tool.py Normal file
View File

@ -0,0 +1,48 @@
from datetime import datetime
import pandas as pd
def aggregate_data(data, interval):
all_minutes = pd.DataFrame({'interval_group': range(0, 1440, interval)})
result = []
if data.empty or 'date' not in data.columns:
complete_data = all_minutes.copy()
complete_data['tweet_count'] = 0
complete_data['date'] = datetime.now().date()
return complete_data
for date in data['date'].unique():
day_data = data[data['date'] == date].copy()
day_data['interval_group'] = (day_data['minute_of_day'] // interval) * interval
agg = day_data.groupby('interval_group').size().reset_index(name='tweet_count')
complete_data = all_minutes.merge(agg, on='interval_group', how='left').fillna({'tweet_count': 0})
complete_data['date'] = date
result.append(complete_data)
if not result:
complete_data = all_minutes.copy()
complete_data['tweet_count'] = 0
complete_data['date'] = data['date'].iloc[0] if not data.empty else datetime.now().date()
return complete_data
return pd.concat(result, ignore_index=True)
def generate_xticks(interval):
if interval <= 5:
tick_step = 60
elif interval <= 10:
tick_step = 60
elif interval <= 30:
tick_step = 120
else:
tick_step = 240
ticks = list(range(0, 1440, tick_step))
tick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in ticks]
return ticks, tick_labels
def minutes_to_time(minutes):
hours = minutes // 60
mins = minutes % 60
return f"{hours:02d}:{mins:02d}"

5
plan.md Normal file
View File

@ -0,0 +1,5 @@
#### 后续
1. 考虑通过 tweets 数预测市场曲线的模型(人工智能)
2. 考虑elon jets的效果

View File

@ -9,4 +9,5 @@ SQLAlchemy~=2.0.38
matplotlib~=3.10.1
numpy~=2.2.3
scipy~=1.15.2
ipython~=8.32.0
ipython~=8.32.0
Flask~=3.0.3