Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sentence_transformers import SentenceTransformer, util | |
| import torch | |
| import spacy | |
| from transformers import pipeline, AutoModelForSeq2SeqLM, T5Tokenizer | |
| import functools | |
| # Model Caching | |
| def load_sentence_model(name): | |
| return SentenceTransformer(name) | |
| def load_paraphraser(): | |
| tokenizer = T5Tokenizer.from_pretrained("ramsrigouthamg/t5_paraphraser") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("ramsrigouthamg/t5_paraphraser") | |
| return pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
| def load_sentiment(): | |
| return pipeline("sentiment-analysis") | |
| # Load static models | |
| model = load_sentence_model('all-MiniLM-L6-v2') | |
| nlp = spacy.load("en_core_web_trf") | |
| paraphraser = load_paraphraser() | |
| sentiment = load_sentiment() | |
| # Similarity and Visualization | |
| def get_similarity(sentence1, sentence2, model_name, visualization_type): | |
| model_local = load_sentence_model(model_name) | |
| emb1 = model_local.encode(sentence1, convert_to_tensor=True) | |
| emb2 = model_local.encode(sentence2, convert_to_tensor=True) | |
| score = util.pytorch_cos_sim(emb1, emb2).item() | |
| if visualization_type == "Bar Chart": | |
| fig, ax = plt.subplots(figsize=(6, 4)) | |
| ax.bar(['Similarity'], [score], color='#4CAF50', edgecolor='black') | |
| ax.set_ylim(0, 1) | |
| ax.set_ylabel('Cosine Similarity') | |
| ax.text(0, score + 0.03, f'{score:.2f}', ha='center', fontsize=12, fontweight='bold') | |
| elif visualization_type == "Gauge": | |
| fig, ax = plt.subplots(figsize=(5, 3), subplot_kw={'projection': 'polar'}) | |
| theta = np.linspace(0, np.pi, 100) | |
| ax.plot(theta, [1] * 100, color='lightgray', linewidth=20, alpha=0.5) | |
| ax.plot(theta[:int(score * 100)], [1] * int(score * 100), color='#2196F3', linewidth=20) | |
| ax.set_ylim(0, 1.2) | |
| ax.set_axis_off() | |
| ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, fontweight='bold') | |
| else: # Heatmap | |
| fig, ax = plt.subplots(figsize=(3, 3)) | |
| cax = ax.imshow([[score]], cmap='coolwarm', vmin=0, vmax=1) | |
| fig.colorbar(cax, orientation='vertical') | |
| ax.set_xticks([]); ax.set_yticks([]) | |
| ax.text(0, 0, f'{score:.2f}', ha='center', va='center', fontsize=18, color='black', fontweight='bold') | |
| return score, f"Similarity Score: {score:.4f}", fig | |
| # Text Analysis | |
| def analyze_text(sentence1, sentence2): | |
| s1_words, s2_words = len(sentence1.split()), len(sentence2.split()) | |
| s1_chars, s2_chars = len(sentence1), len(sentence2) | |
| common = set(sentence1.lower().split()).intersection(set(sentence2.lower().split())) | |
| overlap = len(common)/max(len(set(sentence1.lower().split())), len(set(sentence2.lower().split()))) | |
| return f""" | |
| ## Text Analysis | |
| **Sentence 1:** {s1_words} words, {s1_chars} characters | |
| **Sentence 2:** {s2_words} words, {s2_chars} characters | |
| **Common Words:** {', '.join(common) if common else 'None'} | |
| **Word Overlap Rate:** {overlap:.2f} | |
| """ | |
| # Named Entity Recognition | |
| def extract_entities(text): | |
| doc = nlp(text) | |
| return [(ent.text, ent.label_) for ent in doc.ents] | |
| # POS Tagging | |
| def get_pos_tags(text): | |
| doc = nlp(text) | |
| return [(token.text, token.pos_) for token in doc] | |
| def plot_pos_tags(text1, text2): | |
| doc1 = nlp(text1) | |
| doc2 = nlp(text2) | |
| def count_pos(doc): | |
| counts = {} | |
| for token in doc: | |
| counts[token.pos_] = counts.get(token.pos_, 0) + 1 | |
| return counts | |
| pos_counts1 = count_pos(doc1) | |
| pos_counts2 = count_pos(doc2) | |
| # Combine counts for pie chart | |
| combined_counts = {} | |
| for tag in set(pos_counts1) | set(pos_counts2): | |
| combined_counts[tag] = pos_counts1.get(tag, 0) + pos_counts2.get(tag, 0) | |
| labels = list(combined_counts.keys()) | |
| sizes = list(combined_counts.values()) | |
| # Colors sampled to match your uploaded pie chart visually | |
| custom_colors = [ | |
| '#000066', # Deep navy (N_SING) | |
| '#CCCCFF', # Light lavender (P) | |
| '#0066CC', # Blue (DELM) | |
| '#FF9999', # Light red (ADJ_SIM) | |
| '#660066', # Deep purple (CON) | |
| '#CCFFFF', # Light cyan (N_PL) | |
| '#FFFFCC', # Light yellow (V_PA) | |
| '#990033', # Deep rose (PRO) | |
| '#9999FF', # Light blue/purple (ETC) | |
| '#9966FF', # Extra if needed | |
| '#CC66CC' # Extra if needed | |
| ] | |
| fig, ax = plt.subplots(figsize=(6, 6)) | |
| ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=custom_colors[:len(sizes)]) | |
| ax.axis('equal') # Equal aspect ratio makes the pie circular. | |
| ax.set_title("Combined POS Tag Distribution") | |
| return fig | |
| # Paraphrase Detection | |
| def detect_paraphrase(score, threshold=0.8): | |
| return "β Likely Paraphrase" if score >= threshold else "β Not a Paraphrase" | |
| # Paraphrase Generator | |
| def generate_paraphrases(text): | |
| try: | |
| outputs = paraphraser(text, max_length=60, num_return_sequences=2, do_sample=True) | |
| return [o['generated_text'] for o in outputs] | |
| except: | |
| return ["Paraphrasing failed or model not loaded."] | |
| # Sentiment | |
| def get_sentiment(text): | |
| try: | |
| return sentiment(text)[0] | |
| except: | |
| return {'label': 'Unknown', 'score': 0.0} | |
| # Main processing | |
| def process_text(sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset): | |
| outputs = [] | |
| score, score_text, fig = get_similarity(sentence1, sentence2, model_name, visualization_type) | |
| outputs.extend([score_text, fig]) | |
| analysis = analyze_text(sentence1, sentence2) if perform_analysis else "" | |
| outputs.append(analysis) | |
| paraphrase_result = detect_paraphrase(score) | |
| outputs.append(paraphrase_result) | |
| ner1 = extract_entities(sentence1) | |
| ner2 = extract_entities(sentence2) | |
| ner_display = f""" | |
| ## Named Entities | |
| **Sentence 1:** {', '.join([f'{e[0]} ({e[1]})' for e in ner1]) if ner1 else 'None'} | |
| **Sentence 2:** {', '.join([f'{e[0]} ({e[1]})' for e in ner2]) if ner2 else 'None'} | |
| """ | |
| outputs.append(ner_display) | |
| s1_sentiment = get_sentiment(sentence1) | |
| s2_sentiment = get_sentiment(sentence2) | |
| senti_display = f""" | |
| ## Sentiment Analysis | |
| **Sentence 1:** {s1_sentiment['label']} (score: {s1_sentiment['score']:.2f}) | |
| **Sentence 2:** {s2_sentiment['label']} (score: {s2_sentiment['score']:.2f}) | |
| """ | |
| outputs.append(senti_display) | |
| para1 = generate_paraphrases(sentence1) | |
| para2 = generate_paraphrases(sentence2) | |
| para_text = f""" | |
| ## Paraphrase Suggestions | |
| **Sentence 1:** | |
| - {para1[0]} | |
| - {para1[1]} | |
| **Sentence 2:** | |
| - {para2[0]} | |
| - {para2[1]} | |
| """ | |
| outputs.append(para_text) | |
| # POS Tagging | |
| pos1 = get_pos_tags(sentence1) | |
| pos2 = get_pos_tags(sentence2) | |
| pos_text = f""" | |
| ## Part-of-Speech (POS) Tags | |
| **Sentence 1:** | |
| {', '.join([f"{word} ({pos})" for word, pos in pos1])} | |
| **Sentence 2:** | |
| {', '.join([f"{word} ({pos})" for word, pos in pos2])} | |
| """ | |
| outputs.append(pos_text) | |
| outputs.append(plot_pos_tags(sentence1, sentence2)) | |
| outputs.append("β Your input has been submitted! Please check the π Results tab.") | |
| return outputs | |
| # Models | |
| models = [ | |
| 'all-MiniLM-L6-v2', | |
| 'paraphrase-multilingual-MiniLM-L12-v2', | |
| 'paraphrase-MiniLM-L3-v2', | |
| 'distilbert-base-nli-mean-tokens' | |
| ] | |
| # Gradio UI | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π§ͺ SEMA: Semantic Evaluation & Matching Analyzer") | |
| gr.Markdown("Explore sentence meaning, similarity, and more.") | |
| with gr.Tabs(): | |
| with gr.Tab("π Input"): | |
| sentence1 = gr.Textbox(label="Sentence 1", lines=4) | |
| sentence2 = gr.Textbox(label="Sentence 2", lines=4) | |
| model_name = gr.Dropdown(choices=models, value=models[0], label="Model") | |
| visualization_type = gr.Radio(["Bar Chart", "Gauge", "Heatmap"], value="Gauge", label="Visualization") | |
| perform_analysis = gr.Checkbox(label="Extra Text Analysis", value=True) | |
| compare_dataset = gr.Checkbox(label="Compare with Dataset", value=False) | |
| submit_btn = gr.Button("Run Analysis") | |
| status_msg = gr.Textbox(label="Status", interactive=False) | |
| with gr.Tab("π Results"): | |
| sim_result = gr.Textbox(label="Similarity Score", interactive=False) | |
| vis_output = gr.Plot(label="Visualization") | |
| para_result = gr.Textbox(label="Paraphrase Detection", interactive=False) | |
| with gr.Tab("π¬ Deep Insights"): | |
| with gr.Accordion("π Text Statistics", open=True): | |
| stats_output = gr.Markdown() | |
| with gr.Accordion("π§ Named Entity Recognition", open=False): | |
| ner_output = gr.Markdown() | |
| with gr.Accordion("π¬ Sentiment Analysis", open=False): | |
| sentiment_output = gr.Markdown() | |
| with gr.Accordion("π Paraphrase Suggestions", open=False): | |
| para_output = gr.Markdown() | |
| with gr.Accordion("π§Ύ POS Tagging", open=False): | |
| pos_output = gr.Markdown() | |
| pos_plot_output = gr.Plot() | |
| gr.Examples([ | |
| ["The sky is blue.", "The sky has a beautiful blue color."], | |
| ["What is your name?", "Can you tell me your name?"] | |
| ], inputs=[sentence1, sentence2]) | |
| submit_btn.click( | |
| fn=process_text, | |
| inputs=[sentence1, sentence2, model_name, visualization_type, perform_analysis, compare_dataset], | |
| outputs=[ | |
| sim_result, | |
| vis_output, | |
| stats_output, | |
| para_result, | |
| ner_output, | |
| sentiment_output, | |
| para_output, | |
| pos_output, | |
| pos_plot_output, | |
| status_msg | |
| ] | |
| ) | |
| demo.launch() | |