Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,10 @@ import time
|
|
4 |
from typing import List, Dict
|
5 |
import pandas as pd
|
6 |
import plotly.express as px
|
7 |
-
import plotly.graph_objects as go
|
8 |
import numpy as np
|
9 |
import networkx as nx
|
10 |
from lime.lime_text import LimeTextExplainer
|
11 |
import shap
|
12 |
-
import uuid
|
13 |
|
14 |
# ----------------- Streamlit Page Config -----------------
|
15 |
st.set_page_config(
|
@@ -28,6 +26,7 @@ st.markdown("""
|
|
28 |
.summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
|
29 |
.sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
|
30 |
.sentence-text { font-size: 16px; margin-bottom:3px; }
|
|
|
31 |
</style>
|
32 |
""", unsafe_allow_html=True)
|
33 |
|
@@ -35,8 +34,8 @@ st.markdown("""
|
|
35 |
class UrduTextSummarizer:
|
36 |
def __init__(self):
|
37 |
self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
|
38 |
-
|
39 |
-
|
40 |
|
41 |
def tokenize(self, sentence: str) -> List[str]:
|
42 |
if isinstance(sentence, bytes):
|
@@ -163,7 +162,7 @@ class UrduTextSummarizer:
|
|
163 |
try:
|
164 |
words = self.tokenize(t)
|
165 |
if not words:
|
166 |
-
scores.append([0.0])
|
167 |
continue
|
168 |
temp_sent_words = [words]
|
169 |
temp_all_words = list(set(words))
|
@@ -176,27 +175,28 @@ class UrduTextSummarizer:
|
|
176 |
temp_idf = np.log(1 / (1 + temp_df))
|
177 |
temp_tfidf = temp_tf * temp_idf
|
178 |
norm = np.linalg.norm(temp_tfidf[0])
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
182 |
return np.array(scores)
|
183 |
|
184 |
# LIME Explainer
|
185 |
-
lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
|
186 |
-
top_indices = [s['position'] - 1 for s in sorted_scores[:2]]
|
187 |
for idx in top_indices:
|
188 |
try:
|
189 |
exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
|
190 |
-
explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list()})
|
191 |
except Exception as e:
|
192 |
explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
|
193 |
|
194 |
# SHAP Explainer
|
195 |
-
|
196 |
-
shap_explainer = shap.KernelExplainer(
|
197 |
for idx in top_indices:
|
198 |
try:
|
199 |
-
shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)
|
200 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
|
201 |
except Exception as e:
|
202 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
|
@@ -231,7 +231,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
231 |
for s in sorted_sents:
|
232 |
bar_width = min(int(s['score']*100), 100)
|
233 |
st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
|
234 |
-
f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>"
|
|
|
235 |
|
236 |
# --- Keywords Tab ---
|
237 |
with tabs[2]:
|
@@ -243,22 +244,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
243 |
|
244 |
# --- Graphs Tab ---
|
245 |
with tabs[3]:
|
246 |
-
|
247 |
-
|
248 |
-
heatmap = go.Figure(data=go.Heatmap(
|
249 |
-
z=[df_sent['score'].tolist()],
|
250 |
-
x=[f"جملہ {i}" for i in df_sent['position']],
|
251 |
-
y=["Score"],
|
252 |
-
colorscale='Viridis'
|
253 |
-
))
|
254 |
-
heatmap.update_layout(title="Sentence Score Intensity")
|
255 |
-
st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
|
256 |
-
|
257 |
-
word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
|
258 |
-
fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
|
259 |
-
labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
|
260 |
-
st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
|
261 |
-
|
262 |
# --- Explainability Tab ---
|
263 |
with tabs[4]:
|
264 |
st.subheader("LIME Explanations (Word Contributions)")
|
|
|
4 |
from typing import List, Dict
|
5 |
import pandas as pd
|
6 |
import plotly.express as px
|
|
|
7 |
import numpy as np
|
8 |
import networkx as nx
|
9 |
from lime.lime_text import LimeTextExplainer
|
10 |
import shap
|
|
|
11 |
|
12 |
# ----------------- Streamlit Page Config -----------------
|
13 |
st.set_page_config(
|
|
|
26 |
.summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
|
27 |
.sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
|
28 |
.sentence-text { font-size: 16px; margin-bottom:3px; }
|
29 |
+
.score-text { font-size: 14px; color: #333; margin-top: 5px; direction: rtl; }
|
30 |
</style>
|
31 |
""", unsafe_allow_html=True)
|
32 |
|
|
|
34 |
class UrduTextSummarizer:
|
35 |
def __init__(self):
|
36 |
self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
|
37 |
+
'گا','گی','گے','کہ','جو','یہ','وہ','اس','ان','پر','کر','کرنا','کیا',
|
38 |
+
'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
|
39 |
|
40 |
def tokenize(self, sentence: str) -> List[str]:
|
41 |
if isinstance(sentence, bytes):
|
|
|
162 |
try:
|
163 |
words = self.tokenize(t)
|
164 |
if not words:
|
165 |
+
scores.append([0.0, 1.0])
|
166 |
continue
|
167 |
temp_sent_words = [words]
|
168 |
temp_all_words = list(set(words))
|
|
|
175 |
temp_idf = np.log(1 / (1 + temp_df))
|
176 |
temp_tfidf = temp_tf * temp_idf
|
177 |
norm = np.linalg.norm(temp_tfidf[0])
|
178 |
+
score = min(max(norm, 0.0), 1.0)
|
179 |
+
scores.append([score, 1.0 - score])
|
180 |
+
except Exception:
|
181 |
+
scores.append([0.0, 1.0])
|
182 |
return np.array(scores)
|
183 |
|
184 |
# LIME Explainer
|
185 |
+
lime_explainer = LimeTextExplainer(class_names=["Score", "Not Score"], bow=False)
|
186 |
+
top_indices = [s['position'] - 1 for s in sorted_scores[:2]]
|
187 |
for idx in top_indices:
|
188 |
try:
|
189 |
exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
|
190 |
+
explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list(label=0)})
|
191 |
except Exception as e:
|
192 |
explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
|
193 |
|
194 |
# SHAP Explainer
|
195 |
+
background_texts = sentences[:min(10, len(sentences))]
|
196 |
+
shap_explainer = shap.KernelExplainer(predictor, background_texts)
|
197 |
for idx in top_indices:
|
198 |
try:
|
199 |
+
shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)[0]
|
200 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
|
201 |
except Exception as e:
|
202 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
|
|
|
231 |
for s in sorted_sents:
|
232 |
bar_width = min(int(s['score']*100), 100)
|
233 |
st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
|
234 |
+
f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>"
|
235 |
+
f"<div class='score-text'>اسکور: {s['score']:.2f}</div>", unsafe_allow_html=True)
|
236 |
|
237 |
# --- Keywords Tab ---
|
238 |
with tabs[2]:
|
|
|
244 |
|
245 |
# --- Graphs Tab ---
|
246 |
with tabs[3]:
|
247 |
+
st.info("اس ٹیب میں فی الحال کوئی گراف شامل نہیں ہے۔")
|
248 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
# --- Explainability Tab ---
|
250 |
with tabs[4]:
|
251 |
st.subheader("LIME Explanations (Word Contributions)")
|