|
import streamlit as st |
|
import pandas as pd |
|
import re |
|
import plotly.express as px |
|
|
|
|
|
from src.predict import predict_sentiments |
|
from src.youtube import get_video_comments |
|
|
|
|
|
def extract_video_id(url_or_id: str): |
|
""" |
|
Tries to get the YouTube video ID from different common URL types. |
|
Also handles if the input is just the ID itself. |
|
A bit of regex to find the ID part in common URLs. |
|
""" |
|
if not url_or_id: |
|
return None |
|
|
|
|
|
|
|
patterns = [ |
|
r"watch\?v=([a-zA-Z0-9_-]{11})", |
|
r"youtu\.be/([a-zA-Z0-9_-]{11})", |
|
r"embed/([a-zA-Z0-9_-]{11})", |
|
r"shorts/([a-zA-Z0-9_-]{11})", |
|
] |
|
|
|
for pattern in patterns: |
|
match = re.search(pattern, url_or_id) |
|
if match: |
|
return match.group(1) |
|
|
|
|
|
|
|
if len(url_or_id) == 11 and not ( |
|
"/" in url_or_id or "?" in url_or_id or "=" in url_or_id or "." in url_or_id |
|
): |
|
return url_or_id |
|
|
|
return None |
|
|
|
|
|
def analyze_youtube_video(video_url_or_id: str): |
|
""" |
|
Main function for the YouTube analysis part. |
|
It gets comments, then predicts their sentiments. |
|
Then it summarizes the results. |
|
""" |
|
video_id = extract_video_id(video_url_or_id) |
|
if not video_id: |
|
|
|
st.error( |
|
"Oops! That doesn't look like a valid YouTube URL or Video ID. Please check and try again. Example: Z9kGRMglw-I or youtu.be/3?v=Z9kGRMglw-I" |
|
) |
|
return None |
|
|
|
summary_data = {} |
|
|
|
|
|
try: |
|
with st.spinner(f"Fetching comments & title for video ID: {video_id}..."): |
|
video_data = get_video_comments(video_id) |
|
comments_text_list = video_data.get("comments", []) |
|
video_title = video_data.get("title", "Video Title Not Found") |
|
print( |
|
f"DEBUG (streamlit_app.py): Received title from youtube.py: '{video_title}'" |
|
) |
|
|
|
|
|
if not comments_text_list: |
|
st.warning( |
|
"Hmm, no comments found for this video. Are comments enabled? Or is it a very new video?" |
|
) |
|
|
|
summary_data = { |
|
"num_comments_fetched": 0, |
|
"num_comments_analyzed": 0, |
|
"positive": 0, |
|
"neutral": 0, |
|
"negative": 0, |
|
"positive_percentage": 0, |
|
"neutral_percentage": 0, |
|
"negative_percentage": 0, |
|
"num_valid_predictions": 0, |
|
} |
|
return {"summary": summary_data, "comments_data": []} |
|
|
|
st.info( |
|
f"Great! Found {len(comments_text_list)} comments. Now thinking about their feelings (sentiments)..." |
|
) |
|
|
|
with st.spinner("Analyzing sentiments with the model... Please wait."): |
|
|
|
|
|
prediction_results = predict_sentiments(comments_text_list) |
|
|
|
positive_count = 0 |
|
negative_count = 0 |
|
neutral_count = 0 |
|
error_count = 0 |
|
|
|
for result in prediction_results: |
|
label = result.get("label") |
|
if label == "positive": |
|
positive_count += 1 |
|
elif label == "negative": |
|
negative_count += 1 |
|
elif label == "neutral": |
|
neutral_count += 1 |
|
else: |
|
error_count += 1 |
|
|
|
num_valid_predictions = positive_count + negative_count + neutral_count |
|
total_comments_processed = len(prediction_results) |
|
if error_count > 0: |
|
st.warning( |
|
f"Could not predict sentiment properly for {error_count} comments." |
|
) |
|
|
|
summary_data = { |
|
"video_title": video_title, |
|
"num_comments_fetched": len(comments_text_list), |
|
"num_comments_analyzed": total_comments_processed, |
|
"num_valid_predictions": num_valid_predictions, |
|
"positive": positive_count, |
|
"negative": negative_count, |
|
"neutral": neutral_count, |
|
"positive_percentage": ( |
|
(positive_count / num_valid_predictions) * 100 |
|
if num_valid_predictions > 0 |
|
else 0 |
|
), |
|
"neutral_percentage": ( |
|
(neutral_count / num_valid_predictions) * 100 |
|
if num_valid_predictions > 0 |
|
else 0 |
|
), |
|
"negative_percentage": ( |
|
(negative_count / num_valid_predictions) * 100 |
|
if num_valid_predictions > 0 |
|
else 0 |
|
), |
|
} |
|
|
|
comments_data_for_df = [] |
|
for i in range(len(comments_text_list)): |
|
comment_text = comments_text_list[i] |
|
result = prediction_results[i] |
|
label = result.get("label", "Error") |
|
scores = result.get("scores", {}) |
|
confidence = max(scores.values()) if scores else 0.0 |
|
|
|
comments_data_for_df.append( |
|
{ |
|
"Comment Text": comment_text, |
|
"Predicted Sentiment": label, |
|
"Confidence": confidence, |
|
|
|
} |
|
) |
|
|
|
return {"summary": summary_data, "comments_data": comments_data_for_df} |
|
|
|
except Exception as e: |
|
|
|
st.error(f"Uh oh! An error popped up during analysis: {str(e)}") |
|
|
|
print(f"Full error in analyze_youtube_video: {e}") |
|
import traceback |
|
|
|
traceback.print_exc() |
|
return None |
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Social Sentiment Analysis", layout="centered") |
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.stApp { |
|
background-color: #d6d6d6; |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.title("π SOCIAL SENTIMENT ANALYSIS") |
|
|
|
st.write( |
|
""" |
|
Welcome to the **Social Sentiment Analyzer!** π |
|
|
|
This application uses a fine-tuned RoBERTa model to predict the sentiment (Positive, Neutral, or Negative) expressed in text. |
|
|
|
Use the tabs below to choose your input method: |
|
* **Analyze Text Input:** Paste or type any English text directly. |
|
* **YouTube Analysis:** Enter a YouTube video URL or ID to analyze its comments. |
|
* **Twitter/X Analysis:** Support for analyzing Twitter/X posts is coming soon! |
|
|
|
Select a tab to begin! |
|
""" |
|
) |
|
|
|
|
|
tab_text_input, tab_youtube, tab_twitter = st.tabs( |
|
["Analyze Text Input", "YouTube Analysis", "Twitter/X Analysis (Coming Soon!)"] |
|
) |
|
|
|
with tab_text_input: |
|
|
|
st.header("Analyze Sentiment of Your Text") |
|
st.write( |
|
"Enter a sentence or a short paragraph below to see its predicted sentiment distribution." |
|
) |
|
|
|
|
|
|
|
user_text = st.text_area( |
|
"Enter text here:", |
|
key="text_input_area_key", |
|
height=100, |
|
placeholder="Type or paste your text...", |
|
) |
|
|
|
|
|
if st.button("Analyze Text", key="text_input_analyze_btn"): |
|
|
|
if user_text and not user_text.isspace(): |
|
|
|
with st.spinner("Analyzing your text..."): |
|
try: |
|
|
|
|
|
prediction_results = predict_sentiments([user_text]) |
|
|
|
|
|
if ( |
|
prediction_results |
|
and isinstance(prediction_results, list) |
|
and len(prediction_results) > 0 |
|
): |
|
|
|
result = prediction_results[0] |
|
predicted_label = result.get("label") |
|
scores = result.get( |
|
"scores" |
|
) |
|
|
|
|
|
if ( |
|
predicted_label |
|
and scores |
|
and isinstance(scores, dict) |
|
and predicted_label != "Error" |
|
): |
|
|
|
|
|
st.subheader("Predicted Sentiment:") |
|
|
|
if predicted_label == "positive": |
|
st.success( |
|
f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π" |
|
) |
|
elif predicted_label == "negative": |
|
st.error( |
|
f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π" |
|
) |
|
else: |
|
st.info( |
|
f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π" |
|
) |
|
|
|
st.write("---") |
|
st.subheader( |
|
"Detailed Probabilities:" |
|
) |
|
if scores and isinstance(scores, dict): |
|
|
|
prob_col_neg, prob_col_neu, prob_col_pos = st.columns(3) |
|
|
|
|
|
def get_score(sentiment_name): |
|
return scores.get( |
|
sentiment_name.lower(), 0.0 |
|
) |
|
|
|
value_font_size = "22px" |
|
value_font_weight = "bold" |
|
|
|
with prob_col_neg: |
|
neg_prob = get_score("negative") |
|
|
|
st.markdown("**Negative π:**") |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:red;'>{neg_prob:.1%}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
with prob_col_neu: |
|
neu_prob = get_score("neutral") |
|
|
|
st.markdown("**Neutral π:**") |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:grey;'>{neu_prob:.1%}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
with prob_col_pos: |
|
pos_prob = get_score("positive") |
|
|
|
st.markdown("**Positive π:**") |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:green;'>{pos_prob:.1%}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
else: |
|
|
|
st.write("Could not retrieve probability scores.") |
|
st.write("---") |
|
|
|
|
|
st.subheader("Sentiment Probabilities:") |
|
|
|
|
|
|
|
score_items = list(scores.items()) |
|
if score_items: |
|
df_scores = pd.DataFrame( |
|
score_items, |
|
columns=["Sentiment", "Probability"], |
|
) |
|
|
|
df_scores["Probability"] = pd.to_numeric( |
|
df_scores["Probability"] |
|
) |
|
|
|
|
|
color_map = { |
|
"positive": "green", |
|
"neutral": "grey", |
|
"negative": "red", |
|
} |
|
|
|
df_scores["Sentiment"] = df_scores[ |
|
"Sentiment" |
|
].str.capitalize() |
|
df_scores["Sentiment_Lower"] = df_scores[ |
|
"Sentiment" |
|
].str.lower() |
|
color_map_lower = { |
|
k.lower(): v for k, v in color_map.items() |
|
} |
|
|
|
|
|
fig_pie_text = px.pie( |
|
df_scores, |
|
values="Probability", |
|
names="Sentiment", |
|
title="Probability Distribution per Class", |
|
color="Sentiment_Lower", |
|
color_discrete_map=color_map_lower, |
|
) |
|
|
|
|
|
fig_pie_text.update_traces( |
|
textposition="inside", |
|
textinfo="percent+label", |
|
hovertemplate="Sentiment: %{label}<br>Probability: %{percent}", |
|
) |
|
|
|
fig_pie_text.update_layout( |
|
uniformtext_minsize=16, |
|
uniformtext_mode="hide", |
|
) |
|
|
|
st.plotly_chart(fig_pie_text, use_container_width=True) |
|
|
|
else: |
|
st.warning("Received empty scores, cannot plot chart.") |
|
|
|
else: |
|
|
|
st.error( |
|
f"Sentiment analysis failed for the input text. Result: {result}" |
|
) |
|
|
|
else: |
|
|
|
st.error( |
|
"Received no valid result from the prediction function." |
|
) |
|
|
|
except Exception as analysis_e: |
|
|
|
st.error( |
|
f"An error occurred during text analysis: {str(analysis_e)}" |
|
) |
|
print(f"Full error during text input analysis: {analysis_e}") |
|
import traceback |
|
|
|
traceback.print_exc() |
|
|
|
else: |
|
|
|
st.warning("Please enter some text in the text area first!") |
|
|
|
with tab_youtube: |
|
st.header("YouTube Comment Sentiment Analyzer") |
|
|
|
video_url_input = st.text_input( |
|
"Enter YouTube Video URL or Video ID:", |
|
key="youtube_url_input_key", |
|
placeholder="e.g., Z9kGRMglw-I or full URL", |
|
) |
|
|
|
|
|
if st.button("Analyze YouTube Comments", key="youtube_analyze_button_key"): |
|
if video_url_input: |
|
|
|
analysis_results = analyze_youtube_video(video_url_input) |
|
|
|
if ( |
|
analysis_results and analysis_results["summary"] |
|
): |
|
summary = analysis_results["summary"] |
|
comments_data = analysis_results["comments_data"] |
|
video_title_display = summary.get( |
|
"video_title", "Video Title Not Available" |
|
) |
|
|
|
st.markdown("---") |
|
|
|
st.markdown(f"### Analyzing Video: **{video_title_display}**") |
|
st.markdown("---") |
|
|
|
st.subheader("π Sentiment Summary") |
|
|
|
|
|
label_font_size = "24px" |
|
value_font_size = "28px" |
|
value_font_weight = "bold" |
|
|
|
|
|
positive_color = "green" |
|
neutral_color = "grey" |
|
negative_color = "red" |
|
|
|
|
|
col_fetched, col_analyzed, col_pos, col_neu, col_neg = st.columns(5) |
|
|
|
|
|
with col_fetched: |
|
|
|
st.markdown( |
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Fetched</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_fetched', 0)}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
with col_analyzed: |
|
|
|
st.markdown( |
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Analyzed</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_analyzed', 0)}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
with col_pos: |
|
|
|
st.markdown( |
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Positive π</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{positive_color}; margin-top: 0px;'>{summary.get('positive', 0)}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
with col_neu: |
|
|
|
st.markdown( |
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Neutral π</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{neutral_color}; margin-top: 0px;'>{summary.get('neutral', 0)}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
with col_neg: |
|
|
|
st.markdown( |
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Negative π</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown( |
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{negative_color}; margin-top: 0px;'>{summary.get('negative', 0)}</p>", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
if summary.get("num_valid_predictions", 0) > 0: |
|
|
|
sentiment_data_for_plot = [ |
|
{"Sentiment": "Positive", "Count": summary.get("positive", 0)}, |
|
{"Sentiment": "Neutral", "Count": summary.get("neutral", 0)}, |
|
{"Sentiment": "Negative", "Count": summary.get("negative", 0)}, |
|
] |
|
sentiment_counts_df = pd.DataFrame(sentiment_data_for_plot) |
|
|
|
sentiment_counts_df_for_plot = sentiment_counts_df[ |
|
sentiment_counts_df["Count"] > 0 |
|
].copy() |
|
|
|
|
|
|
|
color_map = { |
|
"Positive": "green", |
|
"Neutral": "grey", |
|
"Negative": "red", |
|
} |
|
|
|
if not sentiment_counts_df_for_plot.empty: |
|
st.subheader("π Sentiment Distribution Charts") |
|
|
|
|
|
fig_pie = px.pie( |
|
sentiment_counts_df_for_plot, |
|
values="Count", |
|
names="Sentiment", |
|
title="Pie Chart: Comment Sentiments", |
|
color="Sentiment", |
|
color_discrete_map=color_map, |
|
) |
|
|
|
fig_pie.update_traces( |
|
textposition="inside", |
|
textinfo="percent+label", |
|
hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}", |
|
) |
|
|
|
fig_pie.update_layout( |
|
uniformtext_minsize=16, uniformtext_mode="hide" |
|
) |
|
|
|
st.plotly_chart(fig_pie, use_container_width=True) |
|
|
|
|
|
fig_bar = px.bar( |
|
sentiment_counts_df_for_plot, |
|
x="Sentiment", |
|
y="Count", |
|
title="Bar Chart: Comment Sentiments", |
|
color="Sentiment", |
|
color_discrete_map=color_map, |
|
labels={ |
|
"Count": "Number of Comments", |
|
"Sentiment": "Sentiment Category", |
|
}, |
|
) |
|
st.plotly_chart(fig_bar, use_container_width=True) |
|
|
|
else: |
|
|
|
st.write( |
|
"No sentiment data (Positive, Neutral, Negative all zero) to display in charts." |
|
) |
|
else: |
|
|
|
st.write( |
|
"Not enough valid sentiment data to display distribution charts." |
|
) |
|
|
|
|
|
if comments_data: |
|
st.subheader( |
|
f"π Analyzed Comments (showing first {len(comments_data)} results)" |
|
) |
|
comments_display_df = pd.DataFrame(comments_data) |
|
|
|
if "Confidence" in comments_display_df.columns: |
|
try: |
|
|
|
comments_display_df["Confidence"] = comments_display_df[ |
|
"Confidence" |
|
].map("{:.1%}".format) |
|
except (TypeError, ValueError): |
|
st.warning( |
|
"Could not format confidence scores." |
|
) |
|
|
|
st.dataframe( |
|
comments_display_df, use_container_width=True, height=400 |
|
) |
|
else: |
|
st.write("No comments were analyzed to display.") |
|
else: |
|
|
|
st.warning("Please enter a YouTube URL or Video ID first!") |
|
|
|
with tab_twitter: |
|
st.header("Twitter/X Post Analysis") |
|
st.info("This feature is currently under construction. Please check back later!") |
|
|
|
|
|
|
|
|
|
|