Spaces:

Subayyal
/

Urdu-Summarizer

Running

App Files Files Community

Subayyal commited on 4 days ago

Commit

4b53dcb

verified ·

1 Parent(s): b621a50

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -31

app.py CHANGED Viewed

@@ -4,12 +4,10 @@ import time
 from typing import List, Dict
 import pandas as pd
 import plotly.express as px
-import plotly.graph_objects as go
 import numpy as np
 import networkx as nx
 from lime.lime_text import LimeTextExplainer
 import shap
-import uuid
 # ----------------- Streamlit Page Config -----------------
 st.set_page_config(
@@ -28,6 +26,7 @@ st.markdown("""
 .summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
 .sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
 .sentence-text { font-size: 16px; margin-bottom:3px; }
 </style>
 """, unsafe_allow_html=True)
@@ -35,8 +34,8 @@ st.markdown("""
 class UrduTextSummarizer:
     def __init__(self):
         self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
-                                'گا','گی','گے','کہ','جو','یہ','وہ','اس','ان','پر','کر','کرنا','کیا',
-                                'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
     def tokenize(self, sentence: str) -> List[str]:
         if isinstance(sentence, bytes):
@@ -163,7 +162,7 @@ class UrduTextSummarizer:
                 try:
                     words = self.tokenize(t)
                     if not words:
-                        scores.append([0.0])
                         continue
                     temp_sent_words = [words]
                     temp_all_words = list(set(words))
@@ -176,27 +175,28 @@ class UrduTextSummarizer:
                     temp_idf = np.log(1 / (1 + temp_df))
                     temp_tfidf = temp_tf * temp_idf
                     norm = np.linalg.norm(temp_tfidf[0])
-                    scores.append([norm])
-                except Exception as e:
-                    scores.append([0.0])
             return np.array(scores)
         # LIME Explainer
-        lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
-        top_indices = [s['position'] - 1 for s in sorted_scores[:2]]  # Top 2 sentences
         for idx in top_indices:
             try:
                 exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
-                explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list()})
             except Exception as e:
                 explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
         # SHAP Explainer
-        background = shap.kmeans(np.array([tfidf[i] for i in range(min(10, num_sents))]), 3)
-        shap_explainer = shap.KernelExplainer(lambda x: predictor(x)[:, 0], background)
         for idx in top_indices:
             try:
-                shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)
                 explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
             except Exception as e:
                 explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
@@ -231,7 +231,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
         for s in sorted_sents:
             bar_width = min(int(s['score']*100), 100)
             st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
-                        f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>", unsafe_allow_html=True)
     # --- Keywords Tab ---
     with tabs[2]:
@@ -243,22 +244,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
     # --- Graphs Tab ---
     with tabs[3]:
-        df_sent = pd.DataFrame(result['sentences'])
-        df_sent['included_label'] = df_sent['included'].apply(lambda x: 'شامل' if x else 'خارج')
-        heatmap = go.Figure(data=go.Heatmap(
-            z=[df_sent['score'].tolist()],
-            x=[f"جملہ {i}" for i in df_sent['position']],
-            y=["Score"],
-            colorscale='Viridis'
-        ))
-        heatmap.update_layout(title="Sentence Score Intensity")
-        st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
-        word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
-        fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
-                                 labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
-        st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
     # --- Explainability Tab ---
     with tabs[4]:
         st.subheader("LIME Explanations (Word Contributions)")

 from typing import List, Dict
 import pandas as pd
 import plotly.express as px
 import numpy as np
 import networkx as nx
 from lime.lime_text import LimeTextExplainer
 import shap
 # ----------------- Streamlit Page Config -----------------
 st.set_page_config(
 .summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
 .sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
 .sentence-text { font-size: 16px; margin-bottom:3px; }
+.score-text { font-size: 14px; color: #333; margin-top: 5px; direction: rtl; }
 </style>
 """, unsafe_allow_html=True)
 class UrduTextSummarizer:
     def __init__(self):
         self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
+                            'گا','گی','گے','کہ','جو','یہ','وہ','اس','ان','پر','کر','کرنا','کیا',
+                            'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
     def tokenize(self, sentence: str) -> List[str]:
         if isinstance(sentence, bytes):
                 try:
                     words = self.tokenize(t)
                     if not words:
+                        scores.append([0.0, 1.0])
                         continue
                     temp_sent_words = [words]
                     temp_all_words = list(set(words))
                     temp_idf = np.log(1 / (1 + temp_df))
                     temp_tfidf = temp_tf * temp_idf
                     norm = np.linalg.norm(temp_tfidf[0])
+                    score = min(max(norm, 0.0), 1.0)
+                    scores.append([score, 1.0 - score])
+                except Exception:
+                    scores.append([0.0, 1.0])
             return np.array(scores)
         # LIME Explainer
+        lime_explainer = LimeTextExplainer(class_names=["Score", "Not Score"], bow=False)
+        top_indices = [s['position'] - 1 for s in sorted_scores[:2]]
         for idx in top_indices:
             try:
                 exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
+                explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list(label=0)})
             except Exception as e:
                 explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
         # SHAP Explainer
+        background_texts = sentences[:min(10, len(sentences))]
+        shap_explainer = shap.KernelExplainer(predictor, background_texts)
         for idx in top_indices:
             try:
+                shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)[0]
                 explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
             except Exception as e:
                 explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
         for s in sorted_sents:
             bar_width = min(int(s['score']*100), 100)
             st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
+                        f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>"
+                        f"<div class='score-text'>اسکور: {s['score']:.2f}</div>", unsafe_allow_html=True)
     # --- Keywords Tab ---
     with tabs[2]:
     # --- Graphs Tab ---
     with tabs[3]:
+        st.info("اس ٹیب میں فی الحال کوئی گراف شامل نہیں ہے۔")
     # --- Explainability Tab ---
     with tabs[4]:
         st.subheader("LIME Explanations (Word Contributions)")