SEMA / app.py
adityasync's picture
Create app.py
ad94382 verified
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, util
import torch
import spacy
from transformers import pipeline, AutoModelForSeq2SeqLM, T5Tokenizer
import functools
# Model Caching
@functools.lru_cache(maxsize=1)
def load_sentence_model(name):
return SentenceTransformer(name)
@functools.lru_cache(maxsize=1)
def load_paraphraser():
tokenizer = T5Tokenizer.from_pretrained("ramsrigouthamg/t5_paraphraser")
model = AutoModelForSeq2SeqLM.from_pretrained("ramsrigouthamg/t5_paraphraser")
return pipeline("text2text-generation", model=model, tokenizer=tokenizer)
@functools.lru_cache(maxsize=1)
def load_sentiment():
return pipeline("sentiment-analysis")
# Load static models
model = load_sentence_model('all-MiniLM-L6-v2')
nlp = spacy.load("en_core_web_trf")
paraphraser = load_paraphraser()
sentiment = load_sentiment()
# Similarity and Visualization
def get_similarity(sentence1, sentence2, model_name, visualization_type):
model_local = load_sentence_model(model_name)
emb1 = model_local.encode(sentence1, convert_to_tensor=True)
emb2 = model_local.encode(sentence2, convert_to_tensor=True)
score = util.pytorch_cos_sim(emb1, emb2).item()
if visualization_type == "Bar Chart":
fig, ax = plt.subplots(figsize=(6, 4))
ax.bar(['Similarity'], [score], color='#4CAF50', edgecolor='black')
ax.set_ylim(0, 1)
ax.set_ylabel('Cosine Similarity')
ax.text(0, score + 0.03, f'{score:.2f}', ha='center', fontsize=12, fontweight='bold')
elif visualization_type == "Gauge":
fig, ax = plt.subplots(figsize=(5, 3), subplot_kw={'projection': 'polar'})
theta = np.linspace(0, np.pi, 100)
ax.plot(theta, [1] * 100, color='lightgray', linewidth=20, alpha=0.5)
ax.plot(theta[:int(score * 100)], [1] * int(score * 100), color='#2196F3', linewidth=20)
ax.set_ylim(0, 1.2)
ax.set_axis_off()
ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, fontweight='bold')
else: # Heatmap
fig, ax = plt.subplots(figsize=(3, 3))
cax = ax.imshow([[score]], cmap='coolwarm', vmin=0, vmax=1)
fig.colorbar(cax, orientation='vertical')
ax.set_xticks([]); ax.set_yticks([])
ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, color='black', fontweight='bold')
return score, f"Similarity Score: {score:.4f}", fig
# Text Analysis
def analyze_text(sentence1, sentence2):
s1_words, s2_words = len(sentence1.split()), len(sentence2.split())
s1_chars, s2_chars = len(sentence1), len(sentence2)
common = set(sentence1.lower().split()).intersection(set(sentence2.lower().split()))
overlap = len(common)/max(len(set(sentence1.lower().split())), len(set(sentence2.lower().split())))
return f"""
## Text Analysis
**Sentence 1:** {s1_words} words, {s1_chars} characters
**Sentence 2:** {s2_words} words, {s2_chars} characters
**Common Words:** {', '.join(common) if common else 'None'}
**Word Overlap Rate:** {overlap:.2f}
"""
# Named Entity Recognition
def extract_entities(text):
doc = nlp(text)
return [(ent.text, ent.label_) for ent in doc.ents]
# POS Tagging
def get_pos_tags(text):
doc = nlp(text)
return [(token.text, token.pos_) for token in doc]
def plot_pos_tags(text1, text2):
doc1 = nlp(text1)
doc2 = nlp(text2)
def count_pos(doc):
counts = {}
for token in doc:
counts[token.pos_] = counts.get(token.pos_, 0) + 1
return counts
pos_counts1 = count_pos(doc1)
pos_counts2 = count_pos(doc2)
# Combine counts for pie chart
combined_counts = {}
for tag in set(pos_counts1) | set(pos_counts2):
combined_counts[tag] = pos_counts1.get(tag, 0) + pos_counts2.get(tag, 0)
labels = list(combined_counts.keys())
sizes = list(combined_counts.values())
# Colors sampled to match your uploaded pie chart visually
custom_colors = [
'#000066', # Deep navy (N_SING)
'#CCCCFF', # Light lavender (P)
'#0066CC', # Blue (DELM)
'#FF9999', # Light red (ADJ_SIM)
'#660066', # Deep purple (CON)
'#CCFFFF', # Light cyan (N_PL)
'#FFFFCC', # Light yellow (V_PA)
'#990033', # Deep rose (PRO)
'#9999FF', # Light blue/purple (ETC)
'#9966FF', # Extra if needed
'#CC66CC' # Extra if needed
]
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=custom_colors[:len(sizes)])
ax.axis('equal') # Equal aspect ratio makes the pie circular.
ax.set_title("Combined POS Tag Distribution")
return fig
# Paraphrase Detection
def detect_paraphrase(score, threshold=0.8):
return "βœ… Likely Paraphrase" if score >= threshold else "❌ Not a Paraphrase"
# Paraphrase Generator
def generate_paraphrases(text):
try:
outputs = paraphraser(text, max_length=60, num_return_sequences=2, do_sample=True)
return [o['generated_text'] for o in outputs]
except:
return ["Paraphrasing failed or model not loaded."]
# Sentiment
def get_sentiment(text):
try:
return sentiment(text)[0]
except:
return {'label': 'Unknown', 'score': 0.0}
# Main processing
def process_text(sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset):
outputs = []
score, score_text, fig = get_similarity(sentence1, sentence2, model_name, visualization_type)
outputs.extend([score_text, fig])
analysis = analyze_text(sentence1, sentence2) if perform_analysis else ""
outputs.append(analysis)
paraphrase_result = detect_paraphrase(score)
outputs.append(paraphrase_result)
ner1 = extract_entities(sentence1)
ner2 = extract_entities(sentence2)
ner_display = f"""
## Named Entities
**Sentence 1:** {', '.join([f'{e[0]} ({e[1]})' for e in ner1]) if ner1 else 'None'}
**Sentence 2:** {', '.join([f'{e[0]} ({e[1]})' for e in ner2]) if ner2 else 'None'}
"""
outputs.append(ner_display)
s1_sentiment = get_sentiment(sentence1)
s2_sentiment = get_sentiment(sentence2)
senti_display = f"""
## Sentiment Analysis
**Sentence 1:** {s1_sentiment['label']} (score: {s1_sentiment['score']:.2f})
**Sentence 2:** {s2_sentiment['label']} (score: {s2_sentiment['score']:.2f})
"""
outputs.append(senti_display)
para1 = generate_paraphrases(sentence1)
para2 = generate_paraphrases(sentence2)
para_text = f"""
## Paraphrase Suggestions
**Sentence 1:**
- {para1[0]}
- {para1[1]}
**Sentence 2:**
- {para2[0]}
- {para2[1]}
"""
outputs.append(para_text)
# POS Tagging
pos1 = get_pos_tags(sentence1)
pos2 = get_pos_tags(sentence2)
pos_text = f"""
## Part-of-Speech (POS) Tags
**Sentence 1:**
{', '.join([f"{word} ({pos})" for word, pos in pos1])}
**Sentence 2:**
{', '.join([f"{word} ({pos})" for word, pos in pos2])}
"""
outputs.append(pos_text)
outputs.append(plot_pos_tags(sentence1, sentence2))
outputs.append("βœ… Your input has been submitted! Please check the πŸ“Š Results tab.")
return outputs
# Models
models = [
'all-MiniLM-L6-v2',
'paraphrase-multilingual-MiniLM-L12-v2',
'paraphrase-MiniLM-L3-v2',
'distilbert-base-nli-mean-tokens'
]
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ§ͺ SEMA: Semantic Evaluation & Matching Analyzer")
gr.Markdown("Explore sentence meaning, similarity, and more.")
with gr.Tabs():
with gr.Tab("πŸ“ Input"):
sentence1 = gr.Textbox(label="Sentence 1", lines=4)
sentence2 = gr.Textbox(label="Sentence 2", lines=4)
model_name = gr.Dropdown(choices=models, value=models[0], label="Model")
visualization_type = gr.Radio(["Bar Chart", "Gauge", "Heatmap"], value="Gauge", label="Visualization")
perform_analysis = gr.Checkbox(label="Extra Text Analysis", value=True)
compare_dataset = gr.Checkbox(label="Compare with Dataset", value=False)
submit_btn = gr.Button("Run Analysis")
status_msg = gr.Textbox(label="Status", interactive=False)
with gr.Tab("πŸ“Š Results"):
sim_result = gr.Textbox(label="Similarity Score", interactive=False)
vis_output = gr.Plot(label="Visualization")
para_result = gr.Textbox(label="Paraphrase Detection", interactive=False)
with gr.Tab("πŸ”¬ Deep Insights"):
with gr.Accordion("πŸ“š Text Statistics", open=True):
stats_output = gr.Markdown()
with gr.Accordion("🧠 Named Entity Recognition", open=False):
ner_output = gr.Markdown()
with gr.Accordion("πŸ’¬ Sentiment Analysis", open=False):
sentiment_output = gr.Markdown()
with gr.Accordion("πŸŒ€ Paraphrase Suggestions", open=False):
para_output = gr.Markdown()
with gr.Accordion("🧾 POS Tagging", open=False):
pos_output = gr.Markdown()
pos_plot_output = gr.Plot()
gr.Examples([
["The sky is blue.", "The sky has a beautiful blue color."],
["What is your name?", "Can you tell me your name?"]
], inputs=[sentence1, sentence2])
submit_btn.click(
fn=process_text,
inputs=[sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset],
outputs=[
sim_result,
vis_output,
stats_output,
para_result,
ner_output,
sentiment_output,
para_output,
pos_output,
pos_plot_output,
status_msg
]
)
demo.launch()