Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
Multilingual Sentiment Analysis (English β’ Urdu β’ Roman Urdu)
|
| 3 |
-------------------------------------------------------------
|
| 4 |
-
|
| 5 |
-
β’
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
β’ Saves each query to sentiment_logs.xlsx (downloadable)
|
| 10 |
"""
|
| 11 |
|
| 12 |
import os
|
|
@@ -16,7 +15,6 @@ import gradio as gr
|
|
| 16 |
from transformers import pipeline
|
| 17 |
|
| 18 |
# -------- Model & Pipeline --------
|
| 19 |
-
# This model supports many languages (incl. English/Urdu/Roman Urdu)
|
| 20 |
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
|
| 21 |
clf = pipeline("sentiment-analysis", model=MODEL_NAME)
|
| 22 |
|
|
@@ -24,47 +22,39 @@ clf = pipeline("sentiment-analysis", model=MODEL_NAME)
|
|
| 24 |
LOG_PATH = "sentiment_logs.xlsx"
|
| 25 |
if not os.path.exists(LOG_PATH):
|
| 26 |
pd.DataFrame(columns=[
|
| 27 |
-
"timestamp", "
|
|
|
|
| 28 |
"stars_probs", "top_star_label"
|
| 29 |
]).to_excel(LOG_PATH, index=False)
|
| 30 |
|
|
|
|
| 31 |
def _aggregate_to_3class(star_scores):
|
| 32 |
-
"""
|
| 33 |
-
star_scores: list of dicts like:
|
| 34 |
-
[{'label': '1 star', 'score': 0.05}, ..., {'label': '5 stars', 'score': 0.6}]
|
| 35 |
-
Returns: (pred_label, confidence, probs_dict, top_star_label)
|
| 36 |
-
"""
|
| 37 |
-
# Normalize keys (some labels are singular/plural)
|
| 38 |
scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
|
| 39 |
-
s1
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
neu = s3
|
| 47 |
-
pos = s4 + s5
|
| 48 |
|
|
|
|
| 49 |
probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
|
| 50 |
pred_label = max(probs3, key=probs3.get)
|
| 51 |
confidence = probs3[pred_label]
|
| 52 |
|
| 53 |
-
# Top star label for reference
|
| 54 |
top_star_label = max(
|
| 55 |
["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
|
| 56 |
key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
|
| 57 |
)
|
| 58 |
-
|
| 59 |
return pred_label, confidence, probs3, top_star_label
|
| 60 |
|
| 61 |
-
|
|
|
|
| 62 |
if not text or not text.strip():
|
| 63 |
return "β Please enter some text.", "", "", LOG_PATH
|
| 64 |
|
| 65 |
-
|
| 66 |
-
star_results = clf(text, return_all_scores=True)[0] # list of 5 dicts
|
| 67 |
-
|
| 68 |
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
|
| 69 |
|
| 70 |
polarity = {
|
|
@@ -73,17 +63,19 @@ def analyze(text):
|
|
| 73 |
"Negative": "βΉοΈ Negative",
|
| 74 |
}[pred_label]
|
| 75 |
|
| 76 |
-
# Log
|
| 77 |
try:
|
| 78 |
df = pd.read_excel(LOG_PATH)
|
| 79 |
except Exception:
|
| 80 |
df = pd.DataFrame(columns=[
|
| 81 |
-
"timestamp", "
|
|
|
|
| 82 |
"stars_probs", "top_star_label"
|
| 83 |
])
|
| 84 |
|
| 85 |
new_row = {
|
| 86 |
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
|
|
|
|
| 87 |
"text": text,
|
| 88 |
"predicted_label_3class": pred_label,
|
| 89 |
"confidence_3class": round(conf, 4),
|
|
@@ -93,13 +85,38 @@ def analyze(text):
|
|
| 93 |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
| 94 |
df.to_excel(LOG_PATH, index=False)
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# -------- Gradio UI --------
|
| 105 |
with gr.Blocks() as demo:
|
|
@@ -109,15 +126,31 @@ with gr.Blocks() as demo:
|
|
| 109 |
"Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β
β 3 classes)"
|
| 110 |
)
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
out_pol = gr.Textbox(label="Polarity")
|
| 118 |
-
out_file = gr.File(label="Download logs (.xlsx)")
|
| 119 |
|
| 120 |
-
|
|
|
|
| 121 |
|
| 122 |
if __name__ == "__main__":
|
| 123 |
-
demo.launch()
|
|
|
|
| 1 |
"""
|
| 2 |
Multilingual Sentiment Analysis (English β’ Urdu β’ Roman Urdu)
|
| 3 |
-------------------------------------------------------------
|
| 4 |
+
Features:
|
| 5 |
+
β’ Single text sentiment analysis with language hint.
|
| 6 |
+
β’ Batch analysis from CSV/XLSX file.
|
| 7 |
+
β’ 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores.
|
| 8 |
+
β’ Saves logs to sentiment_logs.xlsx.
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
import os
|
|
|
|
| 15 |
from transformers import pipeline
|
| 16 |
|
| 17 |
# -------- Model & Pipeline --------
|
|
|
|
| 18 |
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
|
| 19 |
clf = pipeline("sentiment-analysis", model=MODEL_NAME)
|
| 20 |
|
|
|
|
| 22 |
LOG_PATH = "sentiment_logs.xlsx"
|
| 23 |
if not os.path.exists(LOG_PATH):
|
| 24 |
pd.DataFrame(columns=[
|
| 25 |
+
"timestamp", "language_hint", "text",
|
| 26 |
+
"predicted_label_3class", "confidence_3class",
|
| 27 |
"stars_probs", "top_star_label"
|
| 28 |
]).to_excel(LOG_PATH, index=False)
|
| 29 |
|
| 30 |
+
# -------- Helper function: aggregate 5β
β 3-class --------
|
| 31 |
def _aggregate_to_3class(star_scores):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
|
| 33 |
+
s1, s2, s3, s4, s5 = (
|
| 34 |
+
scores.get("1 star", 0.0),
|
| 35 |
+
scores.get("2 stars", 0.0),
|
| 36 |
+
scores.get("3 stars", 0.0),
|
| 37 |
+
scores.get("4 stars", 0.0),
|
| 38 |
+
scores.get("5 stars", 0.0),
|
| 39 |
+
)
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
neg, neu, pos = s1 + s2, s3, s4 + s5
|
| 42 |
probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
|
| 43 |
pred_label = max(probs3, key=probs3.get)
|
| 44 |
confidence = probs3[pred_label]
|
| 45 |
|
|
|
|
| 46 |
top_star_label = max(
|
| 47 |
["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
|
| 48 |
key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
|
| 49 |
)
|
|
|
|
| 50 |
return pred_label, confidence, probs3, top_star_label
|
| 51 |
|
| 52 |
+
# -------- Single text analysis --------
|
| 53 |
+
def analyze_single(text, lang_hint):
|
| 54 |
if not text or not text.strip():
|
| 55 |
return "β Please enter some text.", "", "", LOG_PATH
|
| 56 |
|
| 57 |
+
star_results = clf(text, return_all_scores=True)[0]
|
|
|
|
|
|
|
| 58 |
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
|
| 59 |
|
| 60 |
polarity = {
|
|
|
|
| 63 |
"Negative": "βΉοΈ Negative",
|
| 64 |
}[pred_label]
|
| 65 |
|
| 66 |
+
# Log
|
| 67 |
try:
|
| 68 |
df = pd.read_excel(LOG_PATH)
|
| 69 |
except Exception:
|
| 70 |
df = pd.DataFrame(columns=[
|
| 71 |
+
"timestamp", "language_hint", "text",
|
| 72 |
+
"predicted_label_3class", "confidence_3class",
|
| 73 |
"stars_probs", "top_star_label"
|
| 74 |
])
|
| 75 |
|
| 76 |
new_row = {
|
| 77 |
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
|
| 78 |
+
"language_hint": lang_hint,
|
| 79 |
"text": text,
|
| 80 |
"predicted_label_3class": pred_label,
|
| 81 |
"confidence_3class": round(conf, 4),
|
|
|
|
| 85 |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
| 86 |
df.to_excel(LOG_PATH, index=False)
|
| 87 |
|
| 88 |
+
return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH
|
| 89 |
+
|
| 90 |
+
# -------- Batch analysis --------
|
| 91 |
+
def analyze_batch(file, lang_hint):
|
| 92 |
+
if file is None:
|
| 93 |
+
return "β Please upload a CSV/XLSX file.", None
|
| 94 |
+
|
| 95 |
+
ext = os.path.splitext(file.name)[-1].lower()
|
| 96 |
+
if ext == ".csv":
|
| 97 |
+
df = pd.read_csv(file.name)
|
| 98 |
+
elif ext in [".xls", ".xlsx"]:
|
| 99 |
+
df = pd.read_excel(file.name)
|
| 100 |
+
else:
|
| 101 |
+
return "β Only CSV or Excel files are supported.", None
|
| 102 |
+
|
| 103 |
+
if "text" not in df.columns:
|
| 104 |
+
return "β The file must contain a 'text' column.", None
|
| 105 |
+
|
| 106 |
+
results = []
|
| 107 |
+
for t in df["text"]:
|
| 108 |
+
if not isinstance(t, str) or not t.strip():
|
| 109 |
+
results.append(("N/A", 0.0, "Invalid text"))
|
| 110 |
+
continue
|
| 111 |
+
star_results = clf(t, return_all_scores=True)[0]
|
| 112 |
+
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
|
| 113 |
+
results.append((pred_label, conf, top_star))
|
| 114 |
+
|
| 115 |
+
df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results)
|
| 116 |
+
out_path = "batch_results.xlsx"
|
| 117 |
+
df.to_excel(out_path, index=False)
|
| 118 |
+
|
| 119 |
+
return "β
Batch analysis complete.", out_path
|
| 120 |
|
| 121 |
# -------- Gradio UI --------
|
| 122 |
with gr.Blocks() as demo:
|
|
|
|
| 126 |
"Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β
β 3 classes)"
|
| 127 |
)
|
| 128 |
|
| 129 |
+
with gr.Tab("πΉ Single Text"):
|
| 130 |
+
user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
|
| 131 |
+
lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
|
| 132 |
+
btn = gr.Button("Analyze")
|
| 133 |
+
|
| 134 |
+
out_sent = gr.Textbox(label="Sentiment")
|
| 135 |
+
out_conf = gr.Textbox(label="Confidence (0β1)")
|
| 136 |
+
out_pol = gr.Textbox(label="Polarity")
|
| 137 |
+
out_file = gr.File(label="Download logs (.xlsx)")
|
| 138 |
+
|
| 139 |
+
btn.click(analyze_single, inputs=[user_text, lang_dropdown],
|
| 140 |
+
outputs=[out_sent, out_conf, out_pol, out_file])
|
| 141 |
+
|
| 142 |
+
with gr.Tab("πΉ Batch Upload"):
|
| 143 |
+
gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.")
|
| 144 |
+
file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"])
|
| 145 |
+
lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"],
|
| 146 |
+
label="Language Hint", value="English")
|
| 147 |
+
btn_batch = gr.Button("Analyze Batch")
|
| 148 |
|
| 149 |
+
batch_status = gr.Textbox(label="Status")
|
| 150 |
+
batch_file = gr.File(label="Download Batch Results")
|
|
|
|
|
|
|
| 151 |
|
| 152 |
+
btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch],
|
| 153 |
+
outputs=[batch_status, batch_file])
|
| 154 |
|
| 155 |
if __name__ == "__main__":
|
| 156 |
+
demo.launch()
|