Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -442,69 +442,142 @@ if st.session_state.results:
|
|
| 442 |
|
| 443 |
if selected_user:
|
| 444 |
user_df = df[df['user_id'] == selected_user]
|
| 445 |
-
user_gini_info = user_metrics_df[user_metrics_df['user_id'] == selected_user].iloc[0]
|
| 446 |
|
| 447 |
-
# Display the top-level metrics for the user first
|
| 448 |
-
c1, c2 = st.columns(2)
|
| 449 |
-
with c1: st.metric("Total Posts by User", len(user_df))
|
| 450 |
-
with c2: st.metric("Topic Diversity (Gini)", f"{user_gini_info['gini_coefficient']:.3f}", help=interpret_gini(user_gini_info['gini_coefficient']))
|
| 451 |
|
| 452 |
-
st.markdown("---") # Add a visual separator
|
| 453 |
|
| 454 |
-
# --- START: New Two-Column Layout for Charts ---
|
| 455 |
-
col1, col2 = st.columns(2)
|
| 456 |
|
| 457 |
-
with col1:
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
with col2:
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
|
| 490 |
-
|
| 491 |
-
|
| 492 |
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
else:
|
| 503 |
-
st.info("
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
|
|
|
|
|
|
|
|
|
| 508 |
st.markdown("#### User's Most Recent Posts")
|
| 509 |
user_posts_table = user_df[['post_content', 'timestamp', 'topic_id']] \
|
| 510 |
.sort_values(by='timestamp', ascending=False) \
|
|
|
|
| 442 |
|
| 443 |
if selected_user:
|
| 444 |
user_df = df[df['user_id'] == selected_user]
|
| 445 |
+
# user_gini_info = user_metrics_df[user_metrics_df['user_id'] == selected_user].iloc[0]
|
| 446 |
|
| 447 |
+
# # Display the top-level metrics for the user first
|
| 448 |
+
# c1, c2 = st.columns(2)
|
| 449 |
+
# with c1: st.metric("Total Posts by User", len(user_df))
|
| 450 |
+
# with c2: st.metric("Topic Diversity (Gini)", f"{user_gini_info['gini_coefficient']:.3f}", help=interpret_gini(user_gini_info['gini_coefficient']))
|
| 451 |
|
| 452 |
+
# st.markdown("---") # Add a visual separator
|
| 453 |
|
| 454 |
+
# # --- START: New Two-Column Layout for Charts ---
|
| 455 |
+
# col1, col2 = st.columns(2)
|
| 456 |
|
| 457 |
+
# with col1:
|
| 458 |
+
# # --- Chart 1: Topic Distribution Pie Chart ---
|
| 459 |
+
# user_topic_counts = user_df['topic_id'].value_counts().reset_index()
|
| 460 |
+
# user_topic_counts.columns = ['topic_id', 'count']
|
| 461 |
|
| 462 |
+
# fig_pie = px.pie(
|
| 463 |
+
# user_topic_counts[user_topic_counts.topic_id != -1],
|
| 464 |
+
# names='topic_id',
|
| 465 |
+
# values='count',
|
| 466 |
+
# title=f"Overall Topic Distribution for {selected_user}",
|
| 467 |
+
# hole=0.4
|
| 468 |
+
# )
|
| 469 |
+
# fig_pie.update_layout(margin=dict(l=0, r=0, t=40, b=0))
|
| 470 |
+
# st.plotly_chart(fig_pie, use_container_width=True)
|
| 471 |
+
|
| 472 |
+
# with col2:
|
| 473 |
+
# # --- Chart 2: Topic Evolution for User ---
|
| 474 |
+
# if len(user_df) > 1:
|
| 475 |
+
# user_evo_df = user_df[user_df['topic_id'] != -1].copy()
|
| 476 |
+
# user_evo_df['timestamp'] = pd.to_datetime(user_evo_df['timestamp'])
|
| 477 |
+
|
| 478 |
+
# if not user_evo_df.empty and user_evo_df['timestamp'].nunique() > 1:
|
| 479 |
+
# user_pivot = user_evo_df.pivot_table(index='timestamp', columns='topic_id', aggfunc='size', fill_value=0)
|
| 480 |
|
| 481 |
+
# time_delta = user_pivot.index.max() - user_pivot.index.min()
|
| 482 |
+
# if time_delta.days > 30: resample_freq = 'D'
|
| 483 |
+
# elif time_delta.days > 2: resample_freq = 'H'
|
| 484 |
+
# else: resample_freq = 'T'
|
| 485 |
|
| 486 |
+
# user_resampled = user_pivot.resample(resample_freq).sum()
|
| 487 |
+
# row_sums = user_resampled.sum(axis=1)
|
| 488 |
+
# user_proportions = user_resampled.div(row_sums, axis=0).fillna(0)
|
| 489 |
|
| 490 |
+
# topic_name_map = topic_info.set_index('Topic')['Name'].to_dict()
|
| 491 |
+
# user_proportions.rename(columns=topic_name_map, inplace=True)
|
| 492 |
|
| 493 |
+
# fig_user_evo = px.area(
|
| 494 |
+
# user_proportions,
|
| 495 |
+
# x=user_proportions.index,
|
| 496 |
+
# y=user_proportions.columns,
|
| 497 |
+
# title=f"Topic Proportion Over Time for {selected_user}",
|
| 498 |
+
# labels={'value': 'Topic Proportion', 'variable': 'Topic', 'index': 'Time'},
|
| 499 |
+
# )
|
| 500 |
+
# fig_user_evo.update_layout(margin=dict(l=0, r=0, t=40, b=0))
|
| 501 |
+
# st.plotly_chart(fig_user_evo, use_container_width=True)
|
| 502 |
+
# else:
|
| 503 |
+
# st.info("This user has no posts in meaningful topics or all posts occurred at the same time.")
|
| 504 |
+
# else:
|
| 505 |
+
# st.info("Topic evolution requires more than one post to display.")
|
| 506 |
+
|
| 507 |
+
# Check if this user exists in the metrics dataframe (i.e., has meaningful posts)
|
| 508 |
+
user_metrics_filtered = user_metrics_df[user_metrics_df['user_id'] == selected_user]
|
| 509 |
+
|
| 510 |
+
# --- START OF THE FIX ---
|
| 511 |
+
if not user_metrics_filtered.empty:
|
| 512 |
+
# If the user has meaningful posts, display all metrics and charts
|
| 513 |
+
user_gini_info = user_metrics_filtered.iloc[0]
|
| 514 |
+
|
| 515 |
+
# Display the top-level metrics for the user first
|
| 516 |
+
c1, c2 = st.columns(2)
|
| 517 |
+
with c1: st.metric("Total Posts by User", len(user_df))
|
| 518 |
+
with c2: st.metric("Topic Diversity (Gini)", f"{user_gini_info['gini_coefficient']:.3f}", help=interpret_gini(user_gini_info['gini_coefficient']))
|
| 519 |
+
|
| 520 |
+
st.markdown("---") # Add a visual separator
|
| 521 |
+
|
| 522 |
+
# Two-Column Layout for Charts
|
| 523 |
+
col1, col2 = st.columns(2)
|
| 524 |
+
|
| 525 |
+
with col1:
|
| 526 |
+
# Chart 1: Topic Distribution Pie Chart
|
| 527 |
+
user_topic_counts = user_df['topic_id'].value_counts().reset_index()
|
| 528 |
+
user_topic_counts.columns = ['topic_id', 'count']
|
| 529 |
+
|
| 530 |
+
fig_pie = px.pie(
|
| 531 |
+
user_topic_counts[user_topic_counts.topic_id != -1],
|
| 532 |
+
names='topic_id',
|
| 533 |
+
values='count',
|
| 534 |
+
title=f"Overall Topic Distribution for {selected_user}",
|
| 535 |
+
hole=0.4
|
| 536 |
+
)
|
| 537 |
+
fig_pie.update_layout(margin=dict(l=0, r=0, t=40, b=0))
|
| 538 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
| 539 |
+
|
| 540 |
+
with col2:
|
| 541 |
+
# Chart 2: Topic Evolution for User
|
| 542 |
+
if len(user_df) > 1:
|
| 543 |
+
user_evo_df = user_df[user_df['topic_id'] != -1].copy()
|
| 544 |
+
user_evo_df['timestamp'] = pd.to_datetime(user_evo_df['timestamp'])
|
| 545 |
+
|
| 546 |
+
if not user_evo_df.empty and user_evo_df['timestamp'].nunique() > 1:
|
| 547 |
+
user_pivot = user_evo_df.pivot_table(index='timestamp', columns='topic_id', aggfunc='size', fill_value=0)
|
| 548 |
+
|
| 549 |
+
time_delta = user_pivot.index.max() - user_pivot.index.min()
|
| 550 |
+
if time_delta.days > 30: resample_freq = 'D'
|
| 551 |
+
elif time_delta.days > 2: resample_freq = 'H'
|
| 552 |
+
else: resample_freq = 'T'
|
| 553 |
+
|
| 554 |
+
user_resampled = user_pivot.resample(resample_freq).sum()
|
| 555 |
+
row_sums = user_resampled.sum(axis=1)
|
| 556 |
+
user_proportions = user_resampled.div(row_sums, axis=0).fillna(0)
|
| 557 |
+
|
| 558 |
+
topic_name_map = topic_info.set_index('Topic')['Name'].to_dict()
|
| 559 |
+
user_proportions.rename(columns=topic_name_map, inplace=True)
|
| 560 |
+
|
| 561 |
+
fig_user_evo = px.area(
|
| 562 |
+
user_proportions,
|
| 563 |
+
x=user_proportions.index,
|
| 564 |
+
y=user_proportions.columns,
|
| 565 |
+
title=f"Topic Proportion Over Time for {selected_user}",
|
| 566 |
+
labels={'value': 'Topic Proportion', 'variable': 'Topic', 'index': 'Time'},
|
| 567 |
+
)
|
| 568 |
+
fig_user_evo.update_layout(margin=dict(l=0, r=0, t=40, b=0))
|
| 569 |
+
st.plotly_chart(fig_user_evo, use_container_width=True)
|
| 570 |
+
else:
|
| 571 |
+
st.info("This user has no posts in meaningful topics or all posts occurred at the same time.")
|
| 572 |
else:
|
| 573 |
+
st.info("Topic evolution requires more than one post to display.")
|
| 574 |
+
else:
|
| 575 |
+
# If the user has NO meaningful posts, show a simplified view
|
| 576 |
+
st.metric("Total Posts by User", len(user_df))
|
| 577 |
+
st.warning(f"**{selected_user}** has no posts in any of the identified topics. Their posts may be outliers.")
|
| 578 |
+
# --- END OF THE FIX ---
|
| 579 |
+
|
| 580 |
+
|
| 581 |
st.markdown("#### User's Most Recent Posts")
|
| 582 |
user_posts_table = user_df[['post_content', 'timestamp', 'topic_id']] \
|
| 583 |
.sort_values(by='timestamp', ascending=False) \
|