-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_app.py
138 lines (123 loc) · 4.7 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import logging
import logging.config
import os
import time
import plotly.express as px
import streamlit as st
from dotenv import load_dotenv
from trend_tracker.utils import load_config
from trend_tracker.viz_cluster import DataVizMongoDB, make_wordCloud
log = logging.getLogger("streamlit")
logging.config.fileConfig("logging.ini")
# Load config
config = load_config("config.yml")
load_dotenv()
# Initialize DataViz
dataviz = DataVizMongoDB(
os.environ["MONGODB_CONNECTION_STRING"], config["database_name"], log
)
dataviz.connect()
dataviz.update_data()
st.set_page_config(
page_title="Trend-Tracker dashboard",
page_icon="✅",
layout="wide",
)
# dashboard title
st.title("Trend-Tracker / Live Twitter & Reddit dashboard")
with st.sidebar:
cluster_key = st.selectbox("Cluster selection", dataviz.cluster_keys)
auto_refresh = st.checkbox("Auto refresh", value=True)
refresh_time = st.radio(
"Refresh rate",
[2, 5, 10, 30],
index=1,
format_func=lambda x: f"{x}s",
)
placeholder = st.empty()
while True:
if auto_refresh:
dataviz.update_data()
if dataviz.is_memory_empty():
with placeholder.container():
st.warning("Database is empty.", icon="⚠️")
else:
(
df_data,
top_loc,
pop_cluster,
df_count,
most_freq_clusters,
most_freq_hashs,
valid,
) = dataviz.export_viz_data(
cluster_key=cluster_key if cluster_key else "cluster"
)
nb_cluster = df_data.cluster.nunique()
nb_data = df_count["total"].sum()
nb_data_in_cluster = df_data.cluster.notna().sum()
with placeholder.container():
kpi_1, kpi_2, kpi_3, fig_loc = st.columns([1, 1, 1, 2])
kpi_1.metric(label="Nb Cluster", value=nb_cluster)
kpi_2.metric(label="Nb Data", value=nb_data)
kpi_3.metric(label="Nb Data in clusters", value=nb_data_in_cluster)
with fig_loc:
st.markdown("Evolution of the top locations (tweet only)")
st.table(top_loc)
st.markdown("Evolution of the population inside the clusters")
fig_pop = px.bar(pop_cluster, x=cluster_key, y="counts")
st.write(fig_pop)
st.markdown("Evolution of the data sources")
fig_src = px.line(df_count.loc[1:], x="datetime", y=["reddit", "twitter"])
st.write(fig_src)
if (len(most_freq_hashs) >= 3) and (min(valid) is True):
wc1, wc2, wc3 = st.columns(3)
with wc1:
words, id_cluster = most_freq_hashs[0], most_freq_clusters[0]
make_wordCloud(
df_data[df_data["cluster"] == id_cluster]["text"]
.sample(frac=1)
.head(100)
.sum(),
id_cluster,
)
with wc2:
words, id_cluster = most_freq_hashs[1], most_freq_clusters[1]
make_wordCloud(
df_data[df_data["cluster"] == id_cluster]["text"]
.sample(frac=1)
.head(100)
.sum(),
id_cluster,
)
with wc3:
words, id_cluster = most_freq_hashs[2], most_freq_clusters[2]
make_wordCloud(
df_data[df_data["cluster"] == id_cluster]["text"]
.sample(frac=1)
.head(100)
.sum(),
id_cluster,
)
if len(most_freq_clusters) == 3:
st.markdown("### Detailed Data view")
df_1, df_2, df_3 = st.columns(3)
with df_1:
id_cluster = most_freq_clusters[0]
st.markdown(f"Cluster {id_cluster}")
df_zoom = df_data[df_data[cluster_key] == id_cluster].head(100)
st.dataframe(df_zoom)
with df_2:
id_cluster = most_freq_clusters[1]
st.markdown(f"Cluster {id_cluster}")
df_zoom = df_data[df_data[cluster_key] == id_cluster].head(100)
st.dataframe(df_zoom)
with df_3:
id_cluster = most_freq_clusters[2]
st.markdown(f"Cluster {id_cluster}")
df_zoom = df_data[df_data[cluster_key] == id_cluster].head(100)
st.dataframe(df_zoom)
if refresh_time:
time.sleep(refresh_time)
else:
time.sleep(config["refresh_time"])