import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt from sentence_transformers import SentenceTransformer, util import torch import spacy from transformers import pipeline, AutoModelForSeq2SeqLM, T5Tokenizer import functools # Model Caching @functools.lru_cache(maxsize=1) def load_sentence_model(name): return SentenceTransformer(name) @functools.lru_cache(maxsize=1) def load_paraphraser(): tokenizer = T5Tokenizer.from_pretrained("ramsrigouthamg/t5_paraphraser") model = AutoModelForSeq2SeqLM.from_pretrained("ramsrigouthamg/t5_paraphraser") return pipeline("text2text-generation", model=model, tokenizer=tokenizer) @functools.lru_cache(maxsize=1) def load_sentiment(): return pipeline("sentiment-analysis") # Load static models model = load_sentence_model('all-MiniLM-L6-v2') nlp = spacy.load("en_core_web_trf") paraphraser = load_paraphraser() sentiment = load_sentiment() # Similarity and Visualization def get_similarity(sentence1, sentence2, model_name, visualization_type): model_local = load_sentence_model(model_name) emb1 = model_local.encode(sentence1, convert_to_tensor=True) emb2 = model_local.encode(sentence2, convert_to_tensor=True) score = util.pytorch_cos_sim(emb1, emb2).item() if visualization_type == "Bar Chart": fig, ax = plt.subplots(figsize=(6, 4)) ax.bar(['Similarity'], [score], color='#4CAF50', edgecolor='black') ax.set_ylim(0, 1) ax.set_ylabel('Cosine Similarity') ax.text(0, score + 0.03, f'{score:.2f}', ha='center', fontsize=12, fontweight='bold') elif visualization_type == "Gauge": fig, ax = plt.subplots(figsize=(5, 3), subplot_kw={'projection': 'polar'}) theta = np.linspace(0, np.pi, 100) ax.plot(theta, [1] * 100, color='lightgray', linewidth=20, alpha=0.5) ax.plot(theta[:int(score * 100)], [1] * int(score * 100), color='#2196F3', linewidth=20) ax.set_ylim(0, 1.2) ax.set_axis_off() ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, fontweight='bold') else: # Heatmap fig, ax = plt.subplots(figsize=(3, 3)) cax = ax.imshow([[score]], cmap='coolwarm', vmin=0, vmax=1) fig.colorbar(cax, orientation='vertical') ax.set_xticks([]); ax.set_yticks([]) ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, color='black', fontweight='bold') return score, f"Similarity Score: {score:.4f}", fig # Text Analysis def analyze_text(sentence1, sentence2): s1_words, s2_words = len(sentence1.split()), len(sentence2.split()) s1_chars, s2_chars = len(sentence1), len(sentence2) common = set(sentence1.lower().split()).intersection(set(sentence2.lower().split())) overlap = len(common)/max(len(set(sentence1.lower().split())), len(set(sentence2.lower().split()))) return f""" ## Text Analysis **Sentence 1:** {s1_words} words, {s1_chars} characters **Sentence 2:** {s2_words} words, {s2_chars} characters **Common Words:** {', '.join(common) if common else 'None'} **Word Overlap Rate:** {overlap:.2f} """ # Named Entity Recognition def extract_entities(text): doc = nlp(text) return [(ent.text, ent.label_) for ent in doc.ents] # POS Tagging def get_pos_tags(text): doc = nlp(text) return [(token.text, token.pos_) for token in doc] def plot_pos_tags(text1, text2): doc1 = nlp(text1) doc2 = nlp(text2) def count_pos(doc): counts = {} for token in doc: counts[token.pos_] = counts.get(token.pos_, 0) + 1 return counts pos_counts1 = count_pos(doc1) pos_counts2 = count_pos(doc2) # Combine counts for pie chart combined_counts = {} for tag in set(pos_counts1) | set(pos_counts2): combined_counts[tag] = pos_counts1.get(tag, 0) + pos_counts2.get(tag, 0) labels = list(combined_counts.keys()) sizes = list(combined_counts.values()) # Colors sampled to match your uploaded pie chart visually custom_colors = [ '#000066', # Deep navy (N_SING) '#CCCCFF', # Light lavender (P) '#0066CC', # Blue (DELM) '#FF9999', # Light red (ADJ_SIM) '#660066', # Deep purple (CON) '#CCFFFF', # Light cyan (N_PL) '#FFFFCC', # Light yellow (V_PA) '#990033', # Deep rose (PRO) '#9999FF', # Light blue/purple (ETC) '#9966FF', # Extra if needed '#CC66CC' # Extra if needed ] fig, ax = plt.subplots(figsize=(6, 6)) ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=custom_colors[:len(sizes)]) ax.axis('equal') # Equal aspect ratio makes the pie circular. ax.set_title("Combined POS Tag Distribution") return fig # Paraphrase Detection def detect_paraphrase(score, threshold=0.8): return "โœ… Likely Paraphrase" if score >= threshold else "โŒ Not a Paraphrase" # Paraphrase Generator def generate_paraphrases(text): try: outputs = paraphraser(text, max_length=60, num_return_sequences=2, do_sample=True) return [o['generated_text'] for o in outputs] except: return ["Paraphrasing failed or model not loaded."] # Sentiment def get_sentiment(text): try: return sentiment(text)[0] except: return {'label': 'Unknown', 'score': 0.0} # Main processing def process_text(sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset): outputs = [] score, score_text, fig = get_similarity(sentence1, sentence2, model_name, visualization_type) outputs.extend([score_text, fig]) analysis = analyze_text(sentence1, sentence2) if perform_analysis else "" outputs.append(analysis) paraphrase_result = detect_paraphrase(score) outputs.append(paraphrase_result) ner1 = extract_entities(sentence1) ner2 = extract_entities(sentence2) ner_display = f""" ## Named Entities **Sentence 1:** {', '.join([f'{e[0]} ({e[1]})' for e in ner1]) if ner1 else 'None'} **Sentence 2:** {', '.join([f'{e[0]} ({e[1]})' for e in ner2]) if ner2 else 'None'} """ outputs.append(ner_display) s1_sentiment = get_sentiment(sentence1) s2_sentiment = get_sentiment(sentence2) senti_display = f""" ## Sentiment Analysis **Sentence 1:** {s1_sentiment['label']} (score: {s1_sentiment['score']:.2f}) **Sentence 2:** {s2_sentiment['label']} (score: {s2_sentiment['score']:.2f}) """ outputs.append(senti_display) para1 = generate_paraphrases(sentence1) para2 = generate_paraphrases(sentence2) para_text = f""" ## Paraphrase Suggestions **Sentence 1:** - {para1[0]} - {para1[1]} **Sentence 2:** - {para2[0]} - {para2[1]} """ outputs.append(para_text) # POS Tagging pos1 = get_pos_tags(sentence1) pos2 = get_pos_tags(sentence2) pos_text = f""" ## Part-of-Speech (POS) Tags **Sentence 1:** {', '.join([f"{word} ({pos})" for word, pos in pos1])} **Sentence 2:** {', '.join([f"{word} ({pos})" for word, pos in pos2])} """ outputs.append(pos_text) outputs.append(plot_pos_tags(sentence1, sentence2)) outputs.append("โœ… Your input has been submitted! Please check the ๐Ÿ“Š Results tab.") return outputs # Models models = [ 'all-MiniLM-L6-v2', 'paraphrase-multilingual-MiniLM-L12-v2', 'paraphrase-MiniLM-L3-v2', 'distilbert-base-nli-mean-tokens' ] # Gradio UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# ๐Ÿงช SEMA: Semantic Evaluation & Matching Analyzer") gr.Markdown("Explore sentence meaning, similarity, and more.") with gr.Tabs(): with gr.Tab("๐Ÿ“ Input"): sentence1 = gr.Textbox(label="Sentence 1", lines=4) sentence2 = gr.Textbox(label="Sentence 2", lines=4) model_name = gr.Dropdown(choices=models, value=models[0], label="Model") visualization_type = gr.Radio(["Bar Chart", "Gauge", "Heatmap"], value="Gauge", label="Visualization") perform_analysis = gr.Checkbox(label="Extra Text Analysis", value=True) compare_dataset = gr.Checkbox(label="Compare with Dataset", value=False) submit_btn = gr.Button("Run Analysis") status_msg = gr.Textbox(label="Status", interactive=False) with gr.Tab("๐Ÿ“Š Results"): sim_result = gr.Textbox(label="Similarity Score", interactive=False) vis_output = gr.Plot(label="Visualization") para_result = gr.Textbox(label="Paraphrase Detection", interactive=False) with gr.Tab("๐Ÿ”ฌ Deep Insights"): with gr.Accordion("๐Ÿ“š Text Statistics", open=True): stats_output = gr.Markdown() with gr.Accordion("๐Ÿง  Named Entity Recognition", open=False): ner_output = gr.Markdown() with gr.Accordion("๐Ÿ’ฌ Sentiment Analysis", open=False): sentiment_output = gr.Markdown() with gr.Accordion("๐ŸŒ€ Paraphrase Suggestions", open=False): para_output = gr.Markdown() with gr.Accordion("๐Ÿงพ POS Tagging", open=False): pos_output = gr.Markdown() pos_plot_output = gr.Plot() gr.Examples([ ["The sky is blue.", "The sky has a beautiful blue color."], ["What is your name?", "Can you tell me your name?"] ], inputs=[sentence1, sentence2]) submit_btn.click( fn=process_text, inputs=[sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset], outputs=[ sim_result, vis_output, stats_output, para_result, ner_output, sentiment_output, para_output, pos_output, pos_plot_output, status_msg ] ) demo.launch()