Mars203020 commited on
Commit
bdbc7ce
·
verified ·
1 Parent(s): 7dd30ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -53
app.py CHANGED
@@ -442,69 +442,142 @@ if st.session_state.results:
442
 
443
  if selected_user:
444
  user_df = df[df['user_id'] == selected_user]
445
- user_gini_info = user_metrics_df[user_metrics_df['user_id'] == selected_user].iloc[0]
446
 
447
- # Display the top-level metrics for the user first
448
- c1, c2 = st.columns(2)
449
- with c1: st.metric("Total Posts by User", len(user_df))
450
- with c2: st.metric("Topic Diversity (Gini)", f"{user_gini_info['gini_coefficient']:.3f}", help=interpret_gini(user_gini_info['gini_coefficient']))
451
 
452
- st.markdown("---") # Add a visual separator
453
 
454
- # --- START: New Two-Column Layout for Charts ---
455
- col1, col2 = st.columns(2)
456
 
457
- with col1:
458
- # --- Chart 1: Topic Distribution Pie Chart ---
459
- user_topic_counts = user_df['topic_id'].value_counts().reset_index()
460
- user_topic_counts.columns = ['topic_id', 'count']
461
 
462
- fig_pie = px.pie(
463
- user_topic_counts[user_topic_counts.topic_id != -1],
464
- names='topic_id',
465
- values='count',
466
- title=f"Overall Topic Distribution for {selected_user}",
467
- hole=0.4
468
- )
469
- fig_pie.update_layout(margin=dict(l=0, r=0, t=40, b=0))
470
- st.plotly_chart(fig_pie, use_container_width=True)
471
-
472
- with col2:
473
- # --- Chart 2: Topic Evolution for User ---
474
- if len(user_df) > 1:
475
- user_evo_df = user_df[user_df['topic_id'] != -1].copy()
476
- user_evo_df['timestamp'] = pd.to_datetime(user_evo_df['timestamp'])
477
-
478
- if not user_evo_df.empty and user_evo_df['timestamp'].nunique() > 1:
479
- user_pivot = user_evo_df.pivot_table(index='timestamp', columns='topic_id', aggfunc='size', fill_value=0)
480
 
481
- time_delta = user_pivot.index.max() - user_pivot.index.min()
482
- if time_delta.days > 30: resample_freq = 'D'
483
- elif time_delta.days > 2: resample_freq = 'H'
484
- else: resample_freq = 'T'
485
 
486
- user_resampled = user_pivot.resample(resample_freq).sum()
487
- row_sums = user_resampled.sum(axis=1)
488
- user_proportions = user_resampled.div(row_sums, axis=0).fillna(0)
489
 
490
- topic_name_map = topic_info.set_index('Topic')['Name'].to_dict()
491
- user_proportions.rename(columns=topic_name_map, inplace=True)
492
 
493
- fig_user_evo = px.area(
494
- user_proportions,
495
- x=user_proportions.index,
496
- y=user_proportions.columns,
497
- title=f"Topic Proportion Over Time for {selected_user}",
498
- labels={'value': 'Topic Proportion', 'variable': 'Topic', 'index': 'Time'},
499
- )
500
- fig_user_evo.update_layout(margin=dict(l=0, r=0, t=40, b=0))
501
- st.plotly_chart(fig_user_evo, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  else:
503
- st.info("This user has no posts in meaningful topics or all posts occurred at the same time.")
504
- else:
505
- st.info("Topic evolution requires more than one post to display.")
506
-
507
-
 
 
 
508
  st.markdown("#### User's Most Recent Posts")
509
  user_posts_table = user_df[['post_content', 'timestamp', 'topic_id']] \
510
  .sort_values(by='timestamp', ascending=False) \
 
442
 
443
  if selected_user:
444
  user_df = df[df['user_id'] == selected_user]
445
+ # user_gini_info = user_metrics_df[user_metrics_df['user_id'] == selected_user].iloc[0]
446
 
447
+ # # Display the top-level metrics for the user first
448
+ # c1, c2 = st.columns(2)
449
+ # with c1: st.metric("Total Posts by User", len(user_df))
450
+ # with c2: st.metric("Topic Diversity (Gini)", f"{user_gini_info['gini_coefficient']:.3f}", help=interpret_gini(user_gini_info['gini_coefficient']))
451
 
452
+ # st.markdown("---") # Add a visual separator
453
 
454
+ # # --- START: New Two-Column Layout for Charts ---
455
+ # col1, col2 = st.columns(2)
456
 
457
+ # with col1:
458
+ # # --- Chart 1: Topic Distribution Pie Chart ---
459
+ # user_topic_counts = user_df['topic_id'].value_counts().reset_index()
460
+ # user_topic_counts.columns = ['topic_id', 'count']
461
 
462
+ # fig_pie = px.pie(
463
+ # user_topic_counts[user_topic_counts.topic_id != -1],
464
+ # names='topic_id',
465
+ # values='count',
466
+ # title=f"Overall Topic Distribution for {selected_user}",
467
+ # hole=0.4
468
+ # )
469
+ # fig_pie.update_layout(margin=dict(l=0, r=0, t=40, b=0))
470
+ # st.plotly_chart(fig_pie, use_container_width=True)
471
+
472
+ # with col2:
473
+ # # --- Chart 2: Topic Evolution for User ---
474
+ # if len(user_df) > 1:
475
+ # user_evo_df = user_df[user_df['topic_id'] != -1].copy()
476
+ # user_evo_df['timestamp'] = pd.to_datetime(user_evo_df['timestamp'])
477
+
478
+ # if not user_evo_df.empty and user_evo_df['timestamp'].nunique() > 1:
479
+ # user_pivot = user_evo_df.pivot_table(index='timestamp', columns='topic_id', aggfunc='size', fill_value=0)
480
 
481
+ # time_delta = user_pivot.index.max() - user_pivot.index.min()
482
+ # if time_delta.days > 30: resample_freq = 'D'
483
+ # elif time_delta.days > 2: resample_freq = 'H'
484
+ # else: resample_freq = 'T'
485
 
486
+ # user_resampled = user_pivot.resample(resample_freq).sum()
487
+ # row_sums = user_resampled.sum(axis=1)
488
+ # user_proportions = user_resampled.div(row_sums, axis=0).fillna(0)
489
 
490
+ # topic_name_map = topic_info.set_index('Topic')['Name'].to_dict()
491
+ # user_proportions.rename(columns=topic_name_map, inplace=True)
492
 
493
+ # fig_user_evo = px.area(
494
+ # user_proportions,
495
+ # x=user_proportions.index,
496
+ # y=user_proportions.columns,
497
+ # title=f"Topic Proportion Over Time for {selected_user}",
498
+ # labels={'value': 'Topic Proportion', 'variable': 'Topic', 'index': 'Time'},
499
+ # )
500
+ # fig_user_evo.update_layout(margin=dict(l=0, r=0, t=40, b=0))
501
+ # st.plotly_chart(fig_user_evo, use_container_width=True)
502
+ # else:
503
+ # st.info("This user has no posts in meaningful topics or all posts occurred at the same time.")
504
+ # else:
505
+ # st.info("Topic evolution requires more than one post to display.")
506
+
507
+ # Check if this user exists in the metrics dataframe (i.e., has meaningful posts)
508
+ user_metrics_filtered = user_metrics_df[user_metrics_df['user_id'] == selected_user]
509
+
510
+ # --- START OF THE FIX ---
511
+ if not user_metrics_filtered.empty:
512
+ # If the user has meaningful posts, display all metrics and charts
513
+ user_gini_info = user_metrics_filtered.iloc[0]
514
+
515
+ # Display the top-level metrics for the user first
516
+ c1, c2 = st.columns(2)
517
+ with c1: st.metric("Total Posts by User", len(user_df))
518
+ with c2: st.metric("Topic Diversity (Gini)", f"{user_gini_info['gini_coefficient']:.3f}", help=interpret_gini(user_gini_info['gini_coefficient']))
519
+
520
+ st.markdown("---") # Add a visual separator
521
+
522
+ # Two-Column Layout for Charts
523
+ col1, col2 = st.columns(2)
524
+
525
+ with col1:
526
+ # Chart 1: Topic Distribution Pie Chart
527
+ user_topic_counts = user_df['topic_id'].value_counts().reset_index()
528
+ user_topic_counts.columns = ['topic_id', 'count']
529
+
530
+ fig_pie = px.pie(
531
+ user_topic_counts[user_topic_counts.topic_id != -1],
532
+ names='topic_id',
533
+ values='count',
534
+ title=f"Overall Topic Distribution for {selected_user}",
535
+ hole=0.4
536
+ )
537
+ fig_pie.update_layout(margin=dict(l=0, r=0, t=40, b=0))
538
+ st.plotly_chart(fig_pie, use_container_width=True)
539
+
540
+ with col2:
541
+ # Chart 2: Topic Evolution for User
542
+ if len(user_df) > 1:
543
+ user_evo_df = user_df[user_df['topic_id'] != -1].copy()
544
+ user_evo_df['timestamp'] = pd.to_datetime(user_evo_df['timestamp'])
545
+
546
+ if not user_evo_df.empty and user_evo_df['timestamp'].nunique() > 1:
547
+ user_pivot = user_evo_df.pivot_table(index='timestamp', columns='topic_id', aggfunc='size', fill_value=0)
548
+
549
+ time_delta = user_pivot.index.max() - user_pivot.index.min()
550
+ if time_delta.days > 30: resample_freq = 'D'
551
+ elif time_delta.days > 2: resample_freq = 'H'
552
+ else: resample_freq = 'T'
553
+
554
+ user_resampled = user_pivot.resample(resample_freq).sum()
555
+ row_sums = user_resampled.sum(axis=1)
556
+ user_proportions = user_resampled.div(row_sums, axis=0).fillna(0)
557
+
558
+ topic_name_map = topic_info.set_index('Topic')['Name'].to_dict()
559
+ user_proportions.rename(columns=topic_name_map, inplace=True)
560
+
561
+ fig_user_evo = px.area(
562
+ user_proportions,
563
+ x=user_proportions.index,
564
+ y=user_proportions.columns,
565
+ title=f"Topic Proportion Over Time for {selected_user}",
566
+ labels={'value': 'Topic Proportion', 'variable': 'Topic', 'index': 'Time'},
567
+ )
568
+ fig_user_evo.update_layout(margin=dict(l=0, r=0, t=40, b=0))
569
+ st.plotly_chart(fig_user_evo, use_container_width=True)
570
+ else:
571
+ st.info("This user has no posts in meaningful topics or all posts occurred at the same time.")
572
  else:
573
+ st.info("Topic evolution requires more than one post to display.")
574
+ else:
575
+ # If the user has NO meaningful posts, show a simplified view
576
+ st.metric("Total Posts by User", len(user_df))
577
+ st.warning(f"**{selected_user}** has no posts in any of the identified topics. Their posts may be outliers.")
578
+ # --- END OF THE FIX ---
579
+
580
+
581
  st.markdown("#### User's Most Recent Posts")
582
  user_posts_table = user_df[['post_content', 'timestamp', 'topic_id']] \
583
  .sort_values(by='timestamp', ascending=False) \