adityasync commited on
Commit
ad94382
Β·
verified Β·
1 Parent(s): 3f62666

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +282 -0
app.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from sentence_transformers import SentenceTransformer, util
6
+ import torch
7
+ import spacy
8
+ from transformers import pipeline, AutoModelForSeq2SeqLM, T5Tokenizer
9
+ import functools
10
+
11
+ # Model Caching
12
+ @functools.lru_cache(maxsize=1)
13
+ def load_sentence_model(name):
14
+ return SentenceTransformer(name)
15
+
16
+ @functools.lru_cache(maxsize=1)
17
+ def load_paraphraser():
18
+ tokenizer = T5Tokenizer.from_pretrained("ramsrigouthamg/t5_paraphraser")
19
+ model = AutoModelForSeq2SeqLM.from_pretrained("ramsrigouthamg/t5_paraphraser")
20
+ return pipeline("text2text-generation", model=model, tokenizer=tokenizer)
21
+
22
+ @functools.lru_cache(maxsize=1)
23
+ def load_sentiment():
24
+ return pipeline("sentiment-analysis")
25
+
26
+ # Load static models
27
+ model = load_sentence_model('all-MiniLM-L6-v2')
28
+ nlp = spacy.load("en_core_web_trf")
29
+ paraphraser = load_paraphraser()
30
+ sentiment = load_sentiment()
31
+
32
+ # Similarity and Visualization
33
+ def get_similarity(sentence1, sentence2, model_name, visualization_type):
34
+ model_local = load_sentence_model(model_name)
35
+ emb1 = model_local.encode(sentence1, convert_to_tensor=True)
36
+ emb2 = model_local.encode(sentence2, convert_to_tensor=True)
37
+ score = util.pytorch_cos_sim(emb1, emb2).item()
38
+
39
+ if visualization_type == "Bar Chart":
40
+ fig, ax = plt.subplots(figsize=(6, 4))
41
+ ax.bar(['Similarity'], [score], color='#4CAF50', edgecolor='black')
42
+ ax.set_ylim(0, 1)
43
+ ax.set_ylabel('Cosine Similarity')
44
+ ax.text(0, score + 0.03, f'{score:.2f}', ha='center', fontsize=12, fontweight='bold')
45
+
46
+ elif visualization_type == "Gauge":
47
+ fig, ax = plt.subplots(figsize=(5, 3), subplot_kw={'projection': 'polar'})
48
+ theta = np.linspace(0, np.pi, 100)
49
+ ax.plot(theta, [1] * 100, color='lightgray', linewidth=20, alpha=0.5)
50
+ ax.plot(theta[:int(score * 100)], [1] * int(score * 100), color='#2196F3', linewidth=20)
51
+ ax.set_ylim(0, 1.2)
52
+ ax.set_axis_off()
53
+ ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, fontweight='bold')
54
+
55
+ else: # Heatmap
56
+ fig, ax = plt.subplots(figsize=(3, 3))
57
+ cax = ax.imshow([[score]], cmap='coolwarm', vmin=0, vmax=1)
58
+ fig.colorbar(cax, orientation='vertical')
59
+ ax.set_xticks([]); ax.set_yticks([])
60
+ ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, color='black', fontweight='bold')
61
+
62
+ return score, f"Similarity Score: {score:.4f}", fig
63
+
64
+ # Text Analysis
65
+ def analyze_text(sentence1, sentence2):
66
+ s1_words, s2_words = len(sentence1.split()), len(sentence2.split())
67
+ s1_chars, s2_chars = len(sentence1), len(sentence2)
68
+ common = set(sentence1.lower().split()).intersection(set(sentence2.lower().split()))
69
+ overlap = len(common)/max(len(set(sentence1.lower().split())), len(set(sentence2.lower().split())))
70
+ return f"""
71
+ ## Text Analysis
72
+ **Sentence 1:** {s1_words} words, {s1_chars} characters
73
+ **Sentence 2:** {s2_words} words, {s2_chars} characters
74
+ **Common Words:** {', '.join(common) if common else 'None'}
75
+ **Word Overlap Rate:** {overlap:.2f}
76
+ """
77
+
78
+ # Named Entity Recognition
79
+ def extract_entities(text):
80
+ doc = nlp(text)
81
+ return [(ent.text, ent.label_) for ent in doc.ents]
82
+
83
+ # POS Tagging
84
+ def get_pos_tags(text):
85
+ doc = nlp(text)
86
+ return [(token.text, token.pos_) for token in doc]
87
+
88
+ def plot_pos_tags(text1, text2):
89
+ doc1 = nlp(text1)
90
+ doc2 = nlp(text2)
91
+
92
+ def count_pos(doc):
93
+ counts = {}
94
+ for token in doc:
95
+ counts[token.pos_] = counts.get(token.pos_, 0) + 1
96
+ return counts
97
+
98
+ pos_counts1 = count_pos(doc1)
99
+ pos_counts2 = count_pos(doc2)
100
+
101
+ # Combine counts for pie chart
102
+ combined_counts = {}
103
+ for tag in set(pos_counts1) | set(pos_counts2):
104
+ combined_counts[tag] = pos_counts1.get(tag, 0) + pos_counts2.get(tag, 0)
105
+
106
+ labels = list(combined_counts.keys())
107
+ sizes = list(combined_counts.values())
108
+
109
+ # Colors sampled to match your uploaded pie chart visually
110
+ custom_colors = [
111
+ '#000066', # Deep navy (N_SING)
112
+ '#CCCCFF', # Light lavender (P)
113
+ '#0066CC', # Blue (DELM)
114
+ '#FF9999', # Light red (ADJ_SIM)
115
+ '#660066', # Deep purple (CON)
116
+ '#CCFFFF', # Light cyan (N_PL)
117
+ '#FFFFCC', # Light yellow (V_PA)
118
+ '#990033', # Deep rose (PRO)
119
+ '#9999FF', # Light blue/purple (ETC)
120
+ '#9966FF', # Extra if needed
121
+ '#CC66CC' # Extra if needed
122
+ ]
123
+
124
+ fig, ax = plt.subplots(figsize=(6, 6))
125
+ ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=custom_colors[:len(sizes)])
126
+ ax.axis('equal') # Equal aspect ratio makes the pie circular.
127
+ ax.set_title("Combined POS Tag Distribution")
128
+
129
+ return fig
130
+
131
+
132
+
133
+ # Paraphrase Detection
134
+ def detect_paraphrase(score, threshold=0.8):
135
+ return "βœ… Likely Paraphrase" if score >= threshold else "❌ Not a Paraphrase"
136
+
137
+ # Paraphrase Generator
138
+ def generate_paraphrases(text):
139
+ try:
140
+ outputs = paraphraser(text, max_length=60, num_return_sequences=2, do_sample=True)
141
+ return [o['generated_text'] for o in outputs]
142
+ except:
143
+ return ["Paraphrasing failed or model not loaded."]
144
+
145
+ # Sentiment
146
+ def get_sentiment(text):
147
+ try:
148
+ return sentiment(text)[0]
149
+ except:
150
+ return {'label': 'Unknown', 'score': 0.0}
151
+
152
+ # Main processing
153
+ def process_text(sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset):
154
+ outputs = []
155
+
156
+ score, score_text, fig = get_similarity(sentence1, sentence2, model_name, visualization_type)
157
+ outputs.extend([score_text, fig])
158
+
159
+ analysis = analyze_text(sentence1, sentence2) if perform_analysis else ""
160
+ outputs.append(analysis)
161
+
162
+ paraphrase_result = detect_paraphrase(score)
163
+ outputs.append(paraphrase_result)
164
+
165
+ ner1 = extract_entities(sentence1)
166
+ ner2 = extract_entities(sentence2)
167
+ ner_display = f"""
168
+ ## Named Entities
169
+
170
+ **Sentence 1:** {', '.join([f'{e[0]} ({e[1]})' for e in ner1]) if ner1 else 'None'}
171
+ **Sentence 2:** {', '.join([f'{e[0]} ({e[1]})' for e in ner2]) if ner2 else 'None'}
172
+ """
173
+ outputs.append(ner_display)
174
+
175
+ s1_sentiment = get_sentiment(sentence1)
176
+ s2_sentiment = get_sentiment(sentence2)
177
+ senti_display = f"""
178
+ ## Sentiment Analysis
179
+
180
+ **Sentence 1:** {s1_sentiment['label']} (score: {s1_sentiment['score']:.2f})
181
+ **Sentence 2:** {s2_sentiment['label']} (score: {s2_sentiment['score']:.2f})
182
+ """
183
+ outputs.append(senti_display)
184
+
185
+ para1 = generate_paraphrases(sentence1)
186
+ para2 = generate_paraphrases(sentence2)
187
+ para_text = f"""
188
+ ## Paraphrase Suggestions
189
+
190
+ **Sentence 1:**
191
+ - {para1[0]}
192
+ - {para1[1]}
193
+
194
+ **Sentence 2:**
195
+ - {para2[0]}
196
+ - {para2[1]}
197
+ """
198
+ outputs.append(para_text)
199
+
200
+ # POS Tagging
201
+ pos1 = get_pos_tags(sentence1)
202
+ pos2 = get_pos_tags(sentence2)
203
+ pos_text = f"""
204
+ ## Part-of-Speech (POS) Tags
205
+
206
+ **Sentence 1:**
207
+ {', '.join([f"{word} ({pos})" for word, pos in pos1])}
208
+
209
+ **Sentence 2:**
210
+ {', '.join([f"{word} ({pos})" for word, pos in pos2])}
211
+ """
212
+ outputs.append(pos_text)
213
+ outputs.append(plot_pos_tags(sentence1, sentence2))
214
+
215
+ outputs.append("βœ… Your input has been submitted! Please check the πŸ“Š Results tab.")
216
+ return outputs
217
+
218
+ # Models
219
+ models = [
220
+ 'all-MiniLM-L6-v2',
221
+ 'paraphrase-multilingual-MiniLM-L12-v2',
222
+ 'paraphrase-MiniLM-L3-v2',
223
+ 'distilbert-base-nli-mean-tokens'
224
+ ]
225
+
226
+ # Gradio UI
227
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
228
+ gr.Markdown("# πŸ§ͺ SEMA: Semantic Evaluation & Matching Analyzer")
229
+ gr.Markdown("Explore sentence meaning, similarity, and more.")
230
+
231
+ with gr.Tabs():
232
+ with gr.Tab("πŸ“ Input"):
233
+ sentence1 = gr.Textbox(label="Sentence 1", lines=4)
234
+ sentence2 = gr.Textbox(label="Sentence 2", lines=4)
235
+ model_name = gr.Dropdown(choices=models, value=models[0], label="Model")
236
+ visualization_type = gr.Radio(["Bar Chart", "Gauge", "Heatmap"], value="Gauge", label="Visualization")
237
+ perform_analysis = gr.Checkbox(label="Extra Text Analysis", value=True)
238
+ compare_dataset = gr.Checkbox(label="Compare with Dataset", value=False)
239
+ submit_btn = gr.Button("Run Analysis")
240
+ status_msg = gr.Textbox(label="Status", interactive=False)
241
+
242
+ with gr.Tab("πŸ“Š Results"):
243
+ sim_result = gr.Textbox(label="Similarity Score", interactive=False)
244
+ vis_output = gr.Plot(label="Visualization")
245
+ para_result = gr.Textbox(label="Paraphrase Detection", interactive=False)
246
+
247
+ with gr.Tab("πŸ”¬ Deep Insights"):
248
+ with gr.Accordion("πŸ“š Text Statistics", open=True):
249
+ stats_output = gr.Markdown()
250
+ with gr.Accordion("🧠 Named Entity Recognition", open=False):
251
+ ner_output = gr.Markdown()
252
+ with gr.Accordion("πŸ’¬ Sentiment Analysis", open=False):
253
+ sentiment_output = gr.Markdown()
254
+ with gr.Accordion("πŸŒ€ Paraphrase Suggestions", open=False):
255
+ para_output = gr.Markdown()
256
+ with gr.Accordion("🧾 POS Tagging", open=False):
257
+ pos_output = gr.Markdown()
258
+ pos_plot_output = gr.Plot()
259
+
260
+ gr.Examples([
261
+ ["The sky is blue.", "The sky has a beautiful blue color."],
262
+ ["What is your name?", "Can you tell me your name?"]
263
+ ], inputs=[sentence1, sentence2])
264
+
265
+ submit_btn.click(
266
+ fn=process_text,
267
+ inputs=[sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset],
268
+ outputs=[
269
+ sim_result,
270
+ vis_output,
271
+ stats_output,
272
+ para_result,
273
+ ner_output,
274
+ sentiment_output,
275
+ para_output,
276
+ pos_output,
277
+ pos_plot_output,
278
+ status_msg
279
+ ]
280
+ )
281
+
282
+ demo.launch()