Subayyal commited on
Commit
4b53dcb
·
verified ·
1 Parent(s): b621a50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -31
app.py CHANGED
@@ -4,12 +4,10 @@ import time
4
  from typing import List, Dict
5
  import pandas as pd
6
  import plotly.express as px
7
- import plotly.graph_objects as go
8
  import numpy as np
9
  import networkx as nx
10
  from lime.lime_text import LimeTextExplainer
11
  import shap
12
- import uuid
13
 
14
  # ----------------- Streamlit Page Config -----------------
15
  st.set_page_config(
@@ -28,6 +26,7 @@ st.markdown("""
28
  .summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
29
  .sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
30
  .sentence-text { font-size: 16px; margin-bottom:3px; }
 
31
  </style>
32
  """, unsafe_allow_html=True)
33
 
@@ -35,8 +34,8 @@ st.markdown("""
35
  class UrduTextSummarizer:
36
  def __init__(self):
37
  self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
38
- 'گا','گی','گے','کہ','جو','یہ','وہ','اس','ان','پر','کر','کرنا','کیا',
39
- 'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
40
 
41
  def tokenize(self, sentence: str) -> List[str]:
42
  if isinstance(sentence, bytes):
@@ -163,7 +162,7 @@ class UrduTextSummarizer:
163
  try:
164
  words = self.tokenize(t)
165
  if not words:
166
- scores.append([0.0])
167
  continue
168
  temp_sent_words = [words]
169
  temp_all_words = list(set(words))
@@ -176,27 +175,28 @@ class UrduTextSummarizer:
176
  temp_idf = np.log(1 / (1 + temp_df))
177
  temp_tfidf = temp_tf * temp_idf
178
  norm = np.linalg.norm(temp_tfidf[0])
179
- scores.append([norm])
180
- except Exception as e:
181
- scores.append([0.0])
 
182
  return np.array(scores)
183
 
184
  # LIME Explainer
185
- lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
186
- top_indices = [s['position'] - 1 for s in sorted_scores[:2]] # Top 2 sentences
187
  for idx in top_indices:
188
  try:
189
  exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
190
- explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list()})
191
  except Exception as e:
192
  explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
193
 
194
  # SHAP Explainer
195
- background = shap.kmeans(np.array([tfidf[i] for i in range(min(10, num_sents))]), 3)
196
- shap_explainer = shap.KernelExplainer(lambda x: predictor(x)[:, 0], background)
197
  for idx in top_indices:
198
  try:
199
- shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)
200
  explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
201
  except Exception as e:
202
  explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
@@ -231,7 +231,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
231
  for s in sorted_sents:
232
  bar_width = min(int(s['score']*100), 100)
233
  st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
234
- f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>", unsafe_allow_html=True)
 
235
 
236
  # --- Keywords Tab ---
237
  with tabs[2]:
@@ -243,22 +244,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
243
 
244
  # --- Graphs Tab ---
245
  with tabs[3]:
246
- df_sent = pd.DataFrame(result['sentences'])
247
- df_sent['included_label'] = df_sent['included'].apply(lambda x: 'شامل' if x else 'خارج')
248
- heatmap = go.Figure(data=go.Heatmap(
249
- z=[df_sent['score'].tolist()],
250
- x=[f"جملہ {i}" for i in df_sent['position']],
251
- y=["Score"],
252
- colorscale='Viridis'
253
- ))
254
- heatmap.update_layout(title="Sentence Score Intensity")
255
- st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
256
-
257
- word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
258
- fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
259
- labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
260
- st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
261
-
262
  # --- Explainability Tab ---
263
  with tabs[4]:
264
  st.subheader("LIME Explanations (Word Contributions)")
 
4
  from typing import List, Dict
5
  import pandas as pd
6
  import plotly.express as px
 
7
  import numpy as np
8
  import networkx as nx
9
  from lime.lime_text import LimeTextExplainer
10
  import shap
 
11
 
12
  # ----------------- Streamlit Page Config -----------------
13
  st.set_page_config(
 
26
  .summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
27
  .sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
28
  .sentence-text { font-size: 16px; margin-bottom:3px; }
29
+ .score-text { font-size: 14px; color: #333; margin-top: 5px; direction: rtl; }
30
  </style>
31
  """, unsafe_allow_html=True)
32
 
 
34
  class UrduTextSummarizer:
35
  def __init__(self):
36
  self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
37
+ 'گا','گی','گے','کہ','جو','یہ','وہ','اس','ان','پر','کر','کرنا','کیا',
38
+ 'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
39
 
40
  def tokenize(self, sentence: str) -> List[str]:
41
  if isinstance(sentence, bytes):
 
162
  try:
163
  words = self.tokenize(t)
164
  if not words:
165
+ scores.append([0.0, 1.0])
166
  continue
167
  temp_sent_words = [words]
168
  temp_all_words = list(set(words))
 
175
  temp_idf = np.log(1 / (1 + temp_df))
176
  temp_tfidf = temp_tf * temp_idf
177
  norm = np.linalg.norm(temp_tfidf[0])
178
+ score = min(max(norm, 0.0), 1.0)
179
+ scores.append([score, 1.0 - score])
180
+ except Exception:
181
+ scores.append([0.0, 1.0])
182
  return np.array(scores)
183
 
184
  # LIME Explainer
185
+ lime_explainer = LimeTextExplainer(class_names=["Score", "Not Score"], bow=False)
186
+ top_indices = [s['position'] - 1 for s in sorted_scores[:2]]
187
  for idx in top_indices:
188
  try:
189
  exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
190
+ explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list(label=0)})
191
  except Exception as e:
192
  explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
193
 
194
  # SHAP Explainer
195
+ background_texts = sentences[:min(10, len(sentences))]
196
+ shap_explainer = shap.KernelExplainer(predictor, background_texts)
197
  for idx in top_indices:
198
  try:
199
+ shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)[0]
200
  explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
201
  except Exception as e:
202
  explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
 
231
  for s in sorted_sents:
232
  bar_width = min(int(s['score']*100), 100)
233
  st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
234
+ f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>"
235
+ f"<div class='score-text'>اسکور: {s['score']:.2f}</div>", unsafe_allow_html=True)
236
 
237
  # --- Keywords Tab ---
238
  with tabs[2]:
 
244
 
245
  # --- Graphs Tab ---
246
  with tabs[3]:
247
+ st.info("اس ٹیب میں فی الحال کوئی گراف شامل نہیں ہے۔")
248
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  # --- Explainability Tab ---
250
  with tabs[4]:
251
  st.subheader("LIME Explanations (Word Contributions)")