Spaces:

namfam
/

Ling

Running

App Files Files Community

Nam Fam commited on 13 days ago

Commit

ea99abb

1 Parent(s): a1a0e26

update files

Browse files

Files changed (34) hide show

.dockerignore +58 -0
.gitignore +62 -0
Dockerfile +34 -0
app.py +72 -0
llms.py +54 -0
requirements.txt +13 -0
tasks/__init__.py +0 -0
tasks/classification.py +59 -0
tasks/extraction.py +7 -0
tasks/grammar_checking.py +27 -0
tasks/intent_detection.py +54 -0
tasks/knowledge_graph.py +272 -0
tasks/ner.py +148 -0
tasks/pos_tagging.py +178 -0
tasks/retrieval.py +7 -0
tasks/segmentation.py +7 -0
tasks/sentiment_analysis.py +48 -0
tasks/summarization.py +41 -0
tasks/topic_classification.py +53 -0
tasks/translation.py +43 -0
ui/grammar_ui.py +56 -0
ui/intent_ui.py +83 -0
ui/kg_ui.py +231 -0
ui/ner_ui.py +358 -0
ui/ner_ui.py.new +362 -0
ui/pos_ui.py +297 -0
ui/sentiment_ui.py +108 -0
ui/summarization_ui.py +101 -0
ui/topic_ui.py +108 -0
ui/translation_ui.py +122 -0
utils/ner_helpers.py +88 -0
utils/pos_helpers.py +38 -0
utils/remote_client.py +42 -0
utils/shared.py +1 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,58 @@

+# Git
+.git
+.gitignore
+# Virtual Environment
+.venv
+venv/
+env/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.pytest_cache/
+.mypy_cache/
+# Build and distribution
+build/
+dist/
+*.egg-info/
+# Local development
+*.local
+# Environment files (except example)
+.env
+!.env.example
+# Logs and databases
+*.log
+*.sqlite
+*.sqlite3
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Project specific exclusions
+modal_inference/
+tests/
+api/
+modal_client.py
+*.ipynb
+README1.MD

.gitignore ADDED Viewed

	@@ -0,0 +1,62 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+.env
+.venv
+env/
+venv/
+ENV/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Logs and databases
+*.log
+*.sqlite
+# Local development
+*.local
+# Docker
+data/
+Dockerfile.dev
+# Environment files (except example)
+.env
+.env.example
+# Project specific
+modal_inference/
+utils/modal_client.py
+api/
+tests/
+*.ipynb
+README1.MD

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use official Python image as base
+FROM python:3.10-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Expose the port the app runs on
+EXPOSE 7860
+# Set environment variables for Gradio
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+# Command to run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+from typing import Dict, List, Any, Optional
+from tasks.knowledge_graph import build_knowledge_graph
+# Import all UI components
+from ui.summarization_ui import summarization_ui
+from ui.translation_ui import translation_ui
+from ui.sentiment_ui import sentiment_ui
+from ui.topic_ui import topic_ui
+from ui.ner_ui import ner_ui
+from ui.pos_ui import pos_ui
+from ui.kg_ui import kg_ui
+from ui.intent_ui import intent_ui
+from ui.grammar_ui import grammar_ui
+# UI function wrappers
+def summarization_ui_wrapper():
+    return summarization_ui()
+def translation_ui_wrapper():
+    return translation_ui()
+def sentiment_analysis_ui_wrapper():
+    return sentiment_ui()
+def topic_classification_ui_wrapper():
+    return topic_ui()
+def named_entity_recognition_ui_wrapper():
+    return ner_ui()
+def pos_tagging_ui_wrapper():
+    return pos_ui()
+def extraction_ui():
+    return gr.Markdown("Information Extraction is currently under development.")
+def retrieval_ui():
+    return gr.Markdown("Text Retrieval is currently under development.")
+def grammar_ui_wrapper():
+    return grammar_ui()
+with gr.Blocks(theme=gr.themes.Ocean(), title="Ling - Text Intelligence") as demo:
+    gr.HTML('''
+        <div style="text-align:center; padding: 24px 0 12px 0;">
+            <h1 style="font-size:2.5em; margin-bottom:0.2em; color:#0e7490; letter-spacing:2px; font-family:sans-serif;">Ling</h1>
+            <p style="font-size:1.3em; color:#444; margin-bottom:0.2em;">Text Intelligence Platform for Smart Insights</p>
+        </div>
+    ''')
+    with gr.Tab("Summarization"):
+        summarization_ui_wrapper()
+    with gr.Tab("Translation"):
+        translation_ui_wrapper()
+    with gr.Tab("Sentiment Analysis"):
+        sentiment_analysis_ui_wrapper()
+    with gr.Tab("Topic Classification"):
+        topic_classification_ui_wrapper()
+    with gr.Tab("NER"):
+        named_entity_recognition_ui_wrapper()
+    with gr.Tab("POS Tagging"):
+        pos_tagging_ui_wrapper()
+    with gr.Tab("Intent Detection"):
+        intent_ui()
+    with gr.Tab("Grammar Checking"):
+        grammar_ui_wrapper()
+    with gr.Tab("Knowledge Graph"):
+        kg_ui()
+    with gr.Tab("Retrieval"):
+        retrieval_ui()
+demo.launch()

llms.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from langchain.chat_models import init_chat_model
+from langchain_core.messages import HumanMessage
+from dotenv import load_dotenv
+from typing import List
+from langchain.tools import BaseTool
+from langchain.agents import initialize_agent, AgentType
+_ = load_dotenv()
+class LLM:
+    def __init__(
+        self,
+        model: str = "gemini-2.0-flash",
+        model_provider: str = "google_genai",
+        temperature: float = 0.0,
+        max_tokens: int = 1000
+    ):
+        self.chat_model = init_chat_model(
+            model=model,
+            model_provider=model_provider,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+    def generate(self, prompt: str) -> str:
+        message = HumanMessage(content=prompt)
+        response = self.chat_model.invoke([message])
+        return response.content
+    def bind_tools(self, tools: List[BaseTool], agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION):
+        """
+        Bind LangChain tools to this model and return an AgentExecutor.
+        """
+        return initialize_agent(
+            tools,
+            self.chat_model,
+            agent=agent_type,
+            verbose=False
+        )
+    def set_temperature(self, temperature: float):
+        """
+        Set the temperature for the chat model.
+        """
+        self.chat_model.temperature = temperature
+    def set_max_tokens(self, max_tokens: int):
+        """
+        Set the maximum number of tokens for the chat model.
+        """
+        self.chat_model.max_tokens = max_tokens

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio==5.21.0
+spacy==3.7.5
+networkx==3.2.1
+matplotlib==3.8.3
+fastapi==0.115.11
+uvicorn==0.27.0.post1
+pydantic==2.9.2
+langchain-core  ==0.3.58
+langchain-community==0.3.7
+google-generativeai==0.8.3
+python-dotenv==1.0.1
+pyvis==0.3.2
+ipython

tasks/__init__.py ADDED Viewed

File without changes

tasks/classification.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from llms import LLM
+from utils.remote_client import execute_remote_task
+def text_classification(text: str, model: str, task: str = "topic", candidate_labels=None, custom_instructions: str = "", use_llm: bool = True) -> str:
+    """
+    Classify text using either LLM or traditional (Modal API) method.
+    Args:
+        text: The text to classify
+        model: The model to use
+        task: Either "sentiment" or "topic"
+        candidate_labels: For topic classification, the list of candidate labels
+        custom_instructions: Optional instructions for LLM
+        use_llm: Whether to use LLM or traditional method
+    """
+    if not text.strip():
+        return ""
+    if use_llm:
+        return _classification_with_llm(text, model, task, candidate_labels, custom_instructions)
+    else:
+        return _classification_with_traditional(text, model, candidate_labels)
+def _classification_with_llm(text: str, model: str, task: str, candidate_labels=None, custom_instructions: str = "") -> str:
+    try:
+        llm = LLM(model=model)
+        if task == "sentiment":
+            prompt = (
+                f"Analyze the sentiment of the following text. Return ONLY one value: 'positive', 'negative', or 'neutral'.\n" +
+                (f"{custom_instructions}\n" if custom_instructions else "") +
+                f"Text: {text}\nSentiment:"
+            )
+        else:  # topic classification
+            labels_str = ", ".join(candidate_labels) if candidate_labels else "any appropriate topic"
+            prompt = (
+                f"Classify the following text into ONE of these categories: {labels_str}.\n" +
+                f"Return ONLY the most appropriate category name.\n" +
+                (f"{custom_instructions}\n" if custom_instructions else "") +
+                f"Text: {text}\nCategory:"
+            )
+        result = llm.generate(prompt)
+        return result.strip()
+    except Exception as e:
+        print(f"Error in LLM classification: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _classification_with_traditional(text: str, model: str, labels=None) -> str:
+    try:
+        payload = {"text": text, "model": model}
+        if labels is not None:
+            payload["labels"] = labels
+        resp = execute_remote_task("classification", payload)
+        if "error" in resp:
+            return "Oops! Something went wrong. Please try again later."
+        return resp.get("labels", "")
+    except Exception as e:
+        print(f"Error in traditional classification: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."

tasks/extraction.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from utils.remote_client import execute_remote_task
+def information_extraction(text: str, model: str) -> str:
+    resp = execute_remote_task("extraction", {"text": text, "model": model})
+    if "error" in resp:
+        return "Oops! Something went wrong. Please try again later."
+    return resp.get("entities", "")

tasks/grammar_checking.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import Optional
+from llms import LLM
+def grammar_checking(text: str, model: str, custom_instructions: Optional[str] = None, use_llm: bool = True) -> str:
+    """Grammar and spelling correction using LLM or traditional method"""
+    if not text or not text.strip():
+        return "Please enter input text."
+    if use_llm:
+        return _grammar_checking_with_llm(text, model, custom_instructions)
+    else:
+        return _grammar_checking_with_traditional(text, model)
+def _grammar_checking_with_llm(text: str, model: str, custom_instructions: Optional[str]) -> str:
+    try:
+        llm = LLM(model=model)
+        prompt = (
+            (custom_instructions + "\n") if custom_instructions else ""
+        ) + f"Check and correct grammar and spelling for the following text.\nText: {text}\nCorrected:"
+        result = llm.generate(prompt)
+        return result.strip()
+    except Exception as e:
+        print(f"Error in LLM grammar checking: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _grammar_checking_with_traditional(text: str, model: str) -> str:
+    # Placeholder for traditional grammar checking (could use LanguageTool or similar)
+    return "[Traditional grammar checking is not implemented. Please use LLM mode.]"

tasks/intent_detection.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from typing import List, Optional
+def intent_detection(
+    text: str,
+    model: str,
+    candidate_intents: Optional[List[str]] = None,
+    custom_instructions: str = "",
+    use_llm: bool = True
+) -> str:
+    if not text or not text.strip():
+        return "Please enter input text."
+    if use_llm:
+        return _intent_detection_with_llm(text, model, candidate_intents, custom_instructions)
+    else:
+        return _intent_detection_with_traditional(text, model, candidate_intents)
+from llms import LLM
+def _intent_detection_with_llm(
+    text: str,
+    model: str,
+    candidate_intents: Optional[List[str]],
+    custom_instructions: str
+) -> str:
+    try:
+        llm = LLM(model=model)
+        if candidate_intents:
+            prompt = (
+                f"Classify the intent of the following text from this list: {', '.join(candidate_intents)}.\n"
+                f"Return ONLY the best intent name.\n"
+                + (f"{custom_instructions}\n" if custom_instructions else "")
+                + f"Text: {text}\nIntent:"
+            )
+        else:
+            prompt = (
+                f"Detect the intent of the following text.\n"
+                f"Return ONLY the intent name, do not explain.\n"
+                + (f"{custom_instructions}\n" if custom_instructions else "")
+                + f"Text: {text}\nIntent:"
+            )
+        result = llm.generate(prompt)
+        return result.strip()
+    except Exception as e:
+        print(f"Error in LLM intent detection: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _intent_detection_with_traditional(
+    text: str,
+    model: str,
+    candidate_intents: Optional[List[str]]
+) -> str:
+    # TODO: Implement traditional model inference
+    return "[Traditional model intent detection not implemented yet]"

tasks/knowledge_graph.py ADDED Viewed

	@@ -0,0 +1,272 @@

+from typing import List, Dict, Any, Tuple, Optional
+import spacy
+import networkx as nx
+import matplotlib.pyplot as plt
+from io import BytesIO
+import base64
+import re
+import json
+from langchain_core.messages import HumanMessage
+from langchain.chat_models import init_chat_model
+from dotenv import load_dotenv
+import os
+# Interactive visualization
+from pyvis.network import Network
+# Load environment variables
+_ = load_dotenv()
+class LLMKnowledgeGraph:
+    def __init__(self, model: str = "gemini-2.0-flash", model_provider: str = "google_genai"):
+        """Initialize the LLM for knowledge graph generation."""
+        self.llm = init_chat_model(
+            model=model,
+            model_provider=model_provider,
+            temperature=0.1,  # Lower temperature for more deterministic results
+            max_tokens=2000
+        )
+        self.entity_prompt = """
+        Extract all named entities from the following text and categorize them into the following types:
+        - PERSON: People, including fictional
+        - ORG: Companies, agencies, institutions, etc.
+        - GPE: Countries, cities, states
+        - DATE: Absolute or relative dates or periods
+        - MONEY: Monetary values
+        - PERCENT: Percentage values
+        - QUANTITY: Measurements, weights, distances
+        - EVENT: Named hurricanes, battles, wars, sports events, etc.
+        - WORK_OF_ART: Titles of books, songs, etc.
+        - LAW: Legal document titles
+        - LANGUAGE: Any named language
+        Return the entities in JSON format with the following structure:
+        [
+            {"text": "entity text", "label": "ENTITY_TYPE", "start": character_start, "end": character_end}
+        ]
+        Text: """
+        self.relation_prompt = """
+        Analyze the following text and extract relationships between entities in the form of subject-relation-object triples.
+        For each relation, provide:
+        - The subject (entity that is the source of the relation)
+        - The relation type (e.g., 'works at', 'located in', 'part of')
+        - The object (entity that is the target of the relation)
+        Return the relations in JSON format with the following structure:
+        [
+            {"subject": "subject text", "relation": "relation type", "object": "object text"}
+        ]
+        Text: """
+    def extract_entities_with_llm(self, text: str) -> List[Dict[str, Any]]:
+        """Extract entities from text using LLM."""
+        try:
+            response = self.llm.invoke([HumanMessage(content=self.entity_prompt + text)])
+            # Handle case where response might be a string or a message object
+            if hasattr(response, 'content'):
+                content = response.content
+            else:
+                content = str(response)
+            # Clean the response to ensure it's valid JSON
+            content = content.strip()
+            if content.startswith('```json'):
+                content = content[content.find('['):content.rfind(']')+1]
+            elif content.startswith('['):
+                content = content[:content.rfind(']')+1]
+            entities = json.loads(content)
+            return entities
+        except Exception as e:
+            print(f"Error extracting entities with LLM: {str(e)}")
+            print(f"Response content: {getattr(response, 'content', str(response))}")
+            return []
+    def extract_relations_with_llm(self, text: str) -> List[Dict[str, str]]:
+        """Extract relations between entities using LLM."""
+        try:
+            response = self.llm.invoke([HumanMessage(content=self.relation_prompt + text)])
+            # Handle case where response might be a string or a message object
+            if hasattr(response, 'content'):
+                content = response.content
+            else:
+                content = str(response)
+            # Clean the response to ensure it's valid JSON
+            content = content.strip()
+            if content.startswith('```json'):
+                content = content[content.find('['):content.rfind(']')+1]
+            elif content.startswith('['):
+                content = content[:content.rfind(']')+1]
+            relations = json.loads(content)
+            return relations
+        except Exception as e:
+            print(f"Error extracting relations with LLM: {str(e)}")
+            print(f"Response content: {getattr(response, 'content', str(response))}")
+            return []
+def extract_relations(text: str, model_name: str = "gemini-2.0-flash", use_llm: bool = True) -> Dict[str, Any]:
+    """
+    Extract entities and their relations from text to build a knowledge graph.
+    Args:
+        text: Input text to process
+        model_name: Name of the model to use (spaCy model or LLM)
+        use_llm: Whether to use LLM for relation extraction (default: True)
+    Returns:
+        Dictionary containing nodes and edges for the knowledge graph
+    """
+    if use_llm:
+        # Use LLM for both entity and relation extraction
+        kg_extractor = LLMKnowledgeGraph(model=model_name)
+        # Extract entities using LLM
+        entities = kg_extractor.extract_entities_with_llm(text)
+        # Extract relations using LLM
+        relations = kg_extractor.extract_relations_with_llm(text)
+    else:
+        # Fallback to spaCy for entity and relation extraction
+        try:
+            nlp = spacy.load(model_name)
+        except OSError:
+            # If model is not found, download it
+            import subprocess
+            import sys
+            subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
+            nlp = spacy.load(model_name)
+        # Process the text
+        doc = nlp(text)
+        # Extract entities
+        entities = [{"text": ent.text, "label": ent.label_, "start": ent.start_char, "end": ent.end_char}
+                   for ent in doc.ents]
+        # Extract relations (subject-verb-object)
+        relations = []
+        for sent in doc.sents:
+            for token in sent:
+                if token.dep_ in ("ROOT", "nsubj", "dobj"):
+                    subj = ""
+                    obj = ""
+                    relation = ""
+                    # Find subject
+                    if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
+                        subj = token.text
+                        relation = token.head.lemma_
+                        # Find object
+                        for child in token.head.children:
+                            if child.dep_ == "dobj":
+                                obj = child.text
+                                break
+                    if subj and obj and relation:
+                        relations.append({
+                            "subject": subj,
+                            "relation": relation,
+                            "object": obj
+                        })
+    return {
+        "entities": entities,
+        "relations": relations
+    }
+def build_nx_graph(entities: List[Dict], relations: List[Dict]) -> nx.DiGraph:
+    """Build a NetworkX DiGraph from entities and relations. Ensure all nodes have a 'label'."""
+    G = nx.DiGraph()
+    # Add entities as nodes
+    for entity in entities:
+        label = entity.get("label") or entity.get("type") or "ENTITY"
+        text = entity.get("text") or entity.get("word")
+        G.add_node(text, label=label, type="entity")
+    # Add edges and ensure nodes exist with label
+    for rel in relations:
+        subj = rel.get("subject")
+        obj = rel.get("object")
+        rel_label = rel.get("relation", "related_to")
+        if subj is not None and subj not in G:
+            G.add_node(subj, label="ENTITY", type="entity")
+        if obj is not None and obj not in G:
+            G.add_node(obj, label="ENTITY", type="entity")
+        G.add_edge(subj, obj, label=rel_label)
+    return G
+def visualize_knowledge_graph(entities: List[Dict], relations: List[Dict]) -> str:
+    """
+    Generate a static PNG visualization of the knowledge graph, returned as base64 string for HTML embedding.
+    """
+    G = build_nx_graph(entities, relations)
+    plt.figure(figsize=(12, 8))
+    pos = nx.spring_layout(G, k=0.5, iterations=50)
+    # Color nodes by entity type
+    entity_types = list(set([d.get('label', 'ENTITY') for n, d in G.nodes(data=True)]))
+    color_map = {etype: plt.cm.tab20(i % 20) for i, etype in enumerate(entity_types)}
+    node_colors = [color_map[d.get('label', 'ENTITY')] for n, d in G.nodes(data=True)]
+    nx.draw_networkx_nodes(G, pos, node_size=2000, node_color=node_colors, alpha=0.8)
+    nx.draw_networkx_edges(G, pos, edge_color='gray', arrows=True, arrowsize=20)
+    nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
+    edge_labels = {(u, v): d['label'] for u, v, d in G.edges(data=True)}
+    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
+    buf = BytesIO()
+    plt.savefig(buf, format='png', bbox_inches='tight')
+    plt.close()
+    img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
+    return f"data:image/png;base64,{img_str}"
+def visualize_knowledge_graph_interactive(entities: List[Dict], relations: List[Dict]) -> str:
+    """
+    Generate an interactive HTML visualization of the knowledge graph using pyvis.
+    Returns HTML as a string for embedding in Gradio or web UI.
+    """
+    G = build_nx_graph(entities, relations)
+    net = Network(height="600px", width="100%", directed=True, notebook=False)
+    # Color map for entity types
+    entity_types = list(set([d.get('label', 'ENTITY') for n, d in G.nodes(data=True)]))
+    color_palette = ["#e3f2fd", "#e8f5e9", "#fff8e1", "#f3e5f5", "#e8eaf6", "#e0f7fa", "#f1f8e9", "#fce4ec", "#e8f5e9", "#f5f5f5", "#fafafa", "#e1f5fe", "#fff3e0", "#d7ccc8", "#f9fbe7", "#fbe9e7", "#ede7f6", "#e0f2f1"]
+    color_map = {etype: color_palette[i % len(color_palette)] for i, etype in enumerate(entity_types)}
+    for n, d in G.nodes(data=True):
+        label = d.get('label', 'ENTITY')
+        net.add_node(n, label=n, title=f"{n}<br>Type: {label}", color=color_map[label])
+    for u, v, d in G.edges(data=True):
+        net.add_edge(u, v, label=d['label'], title=d['label'])
+    net.set_options('''var options = { "edges": { "arrows": {"to": {"enabled": true}}, "color": {"color": "#888"} }, "nodes": { "font": {"size": 18} }, "physics": { "enabled": true } };''')
+    html_buf = BytesIO()
+    net.write_html(html_buf)
+    html_buf.seek(0)
+    html = html_buf.read().decode('utf-8')
+    # Remove <html>, <body> wrappers to allow embedding in Gradio
+    body_start = html.find('<body>') + len('<body>')
+    body_end = html.find('</body>')
+    body_content = html[body_start:body_end]
+    return body_content
+def build_knowledge_graph(text: str, model_name: str = "gemini-2.0-flash", use_llm: bool = True) -> Dict[str, Any]:
+    """
+    Main function to build a knowledge graph from text.
+    Args:
+        text: Input text to process
+        model_name: Name of the model to use (spaCy model or LLM)
+        use_llm: Whether to use LLM for relation extraction (default: True)
+    Returns:
+        Dictionary containing the knowledge graph data and visualization
+    """
+    # Extract entities and relations
+    result = extract_relations(text, model_name, use_llm)
+    # Generate visualization
+    if result.get("entities") and result.get("relations"):
+        visualization = visualize_knowledge_graph(result["entities"], result["relations"])
+        result["visualization"] = visualization
+    else:
+        result["visualization"] = None
+    return result

tasks/ner.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from typing import Dict, List, Union, Optional
+from llms import LLM
+from dataclasses import dataclass, asdict
+import json
+@dataclass
+class Entity:
+    text: str
+    type: str
+    start: int
+    end: int
+    confidence: Optional[float] = None
+    description: Optional[str] = None
+def named_entity_recognition(
+    text: str,
+    model: str = "gemini-2.0-flash",
+    use_llm: bool = True,
+    entity_types: Optional[List[str]] = None
+) -> Union[str, List[Dict]]:
+    """
+    Perform named entity recognition using either LLM or traditional NER models.
+    Args:
+        text: Input text to analyze
+        model: Model to use for NER
+        use_llm: Whether to use LLM for more accurate but slower NER
+        entity_types: List of entity types to extract (only used with LLM)
+    Returns:
+        List of entities with their types and positions
+    """
+    if not text.strip():
+        return []
+    if use_llm:
+        return _ner_with_llm(text, model, entity_types)
+    else:
+        return _ner_with_traditional(text, model)
+def _ner_with_llm(
+    text: str,
+    model_name: str,
+    entity_types: Optional[List[str]] = None
+) -> List[Dict]:
+    """Use LLM for more accurate and flexible NER."""
+    # Default entity types if none provided
+    if entity_types is None:
+        entity_types = [
+            "PERSON", "ORG", "GPE", "LOC", "PRODUCT", "EVENT",
+            "WORK_OF_ART", "LAW", "LANGUAGE", "DATE", "TIME",
+            "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL"
+        ]
+    # Create the prompt
+    entity_types_str = ", ".join(entity_types)
+    prompt = f"""
+    Extract named entities from the following text and categorize them into these types: {entity_types_str}.
+    For each entity, provide:
+    - The entity text
+    - The entity type (from the list above)
+    - The start and end character positions
+    - (Optional) A brief description of the entity
+    - (Optional) Confidence score (0-1)
+    Return the entities as a JSON array of objects with these fields:
+    - text: The entity text
+    - type: The entity type
+    - start: Start character position
+    - end: End character position
+    - description: (Optional) Brief description
+    - confidence: (Optional) Confidence score (0-1)
+    Text: """ + text + """
+    JSON response (only the array, no other text):
+    ["""
+    try:
+        # Initialize LLM
+        llm = LLM(model=model_name, temperature=0.1)
+        # Get response from LLM
+        response = llm.generate(prompt)
+        # Clean and parse the response
+        response = response.strip()
+        if response.startswith('```json'):
+            response = response[response.find('['):response.rfind(']')+1]
+        elif response.startswith('['):
+            response = response[:response.rfind(']')+1]
+        entities = json.loads(response)
+        # Convert to Entity objects and validate
+        valid_entities = []
+        for ent in entities:
+            try:
+                entity = Entity(
+                    text=ent['text'],
+                    type=ent['type'],
+                    start=int(ent['start']),
+                    end=int(ent['end']),
+                    confidence=ent.get('confidence'),
+                    description=ent.get('description')
+                )
+                valid_entities.append(asdict(entity))
+            except (KeyError, ValueError) as e:
+                print(f"Error parsing entity: {e}")
+                continue
+        return valid_entities
+    except Exception as e:
+        print(f"Error in LLM NER: {str(e)}")
+        # Fall back to traditional NER if LLM fails
+        return _ner_with_traditional(text, "en_core_web_md")
+def _ner_with_traditional(text: str, model: str) -> List[Dict]:
+    """Fallback to traditional NER models."""
+    try:
+        import spacy
+        # Load the appropriate model
+        if model == "en_core_web_sm" or model == "en_core_web_md" or model == "en_core_web_lg":
+            nlp = spacy.load(model)
+        else:
+            nlp = spacy.load("en_core_web_md")
+        # Process the text
+        doc = nlp(text)
+        # Convert to our entity format
+        entities = []
+        for ent in doc.ents:
+            entities.append({
+                'text': ent.text,
+                'type': ent.label_,
+                'start': ent.start_char,
+                'end': ent.end_char,
+                'confidence': 1.0  # Traditional NER doesn't provide confidence
+            })
+        return entities
+    except Exception as e:
+        print(f"Error in traditional NER: {str(e)}")
+        return []

tasks/pos_tagging.py ADDED Viewed

	@@ -0,0 +1,178 @@

+from typing import Dict, List, Union, Optional
+from llms import LLM
+import json
+import re
+def pos_tagging(
+    text: str,
+    model: str = "en_core_web_sm",
+    use_llm: bool = False,
+    custom_instructions: str = ""
+) -> Dict[str, List[Union[str, List[str]]]]:
+    """
+    Perform Part-of-Speech tagging on the input text using either LLM or traditional models.
+    Args:
+        text: The input text to tag
+        model: The model to use for tagging (e.g., 'en_core_web_sm', 'gpt-4', 'gemini-pro')
+        use_llm: Whether to use LLM for more accurate but slower POS tagging
+        custom_instructions: Custom instructions for LLM-based tagging
+    Returns:
+        A dictionary containing 'tokens' and 'tags' lists
+    """
+    if not text.strip():
+        return {"tokens": [], "tags": []}
+    if use_llm:
+        return _pos_tagging_with_llm(text, model, custom_instructions)
+    else:
+        return _pos_tagging_traditional(text, model)
+def _extract_json_array(text: str) -> str:
+    """Extract JSON array from text, handling various formats."""
+    import re
+    # Try to find JSON array pattern
+    json_match = re.search(r'\[\s*\{.*\}\s*\]', text, re.DOTALL)
+    if json_match:
+        return json_match.group(0)
+    # If not found, try to find array between square brackets
+    start = text.find('[')
+    end = text.rfind(']')
+    if start >= 0 and end > start:
+        return text[start:end+1]
+    return text
+def _pos_tagging_with_llm(
+    text: str,
+    model_name: str,
+    custom_instructions: str = ""
+) -> Dict[str, List[str]]:
+    """Use LLM for more accurate and flexible POS tagging."""
+    # Create the prompt with clear instructions
+    prompt = """Analyze the following text and provide Part-of-Speech (POS) tags for each token.
+Return the result as a JSON array of objects with 'token' and 'tag' keys.
+Use standard Universal Dependencies POS tags:
+- ADJ: adjective
+- ADP: adposition
+- ADV: adverb
+- AUX: auxiliary verb
+- CONJ: coordinating conjunction
+- DET: determiner
+- INTJ: interjection
+- NOUN: noun
+- NUM: numeral
+- PART: particle
+- PRON: pronoun
+- PROPN: proper noun
+- PUNCT: punctuation
+- SCONJ: subordinating conjunction
+- SYM: symbol
+- VERB: verb
+- X: other
+Example output format:
+[
+  {"token": "Hello", "tag": "INTJ"},
+  {"token": "world", "tag": "NOUN"},
+  {"token": ".", "tag": "PUNCT"}
+]
+Text to analyze:
+"""
+    if custom_instructions:
+        prompt = f"{custom_instructions}\n\n{prompt}"
+    prompt += f'"{text}"'
+    try:
+        # Initialize LLM with lower temperature for more deterministic output
+        llm = LLM(model=model_name, temperature=0.1, max_tokens=2000)
+        # Get response from LLM
+        response = llm.generate(prompt)
+        print(f"LLM Raw Response: {response[:500]}...")  # Log first 500 chars
+        if not response.strip():
+            raise ValueError("Empty response from LLM")
+        # Extract JSON array from response
+        json_str = _extract_json_array(response)
+        if not json_str:
+            raise ValueError("No JSON array found in response")
+        # Parse the JSON
+        try:
+            pos_tags = json.loads(json_str)
+        except json.JSONDecodeError as e:
+            # Try to fix common JSON issues
+            json_str = json_str.replace("'", '"')
+            json_str = re.sub(r'(\w+):', r'"\1":', json_str)  # Add quotes around keys
+            pos_tags = json.loads(json_str)
+        # Validate and extract tokens and tags
+        if not isinstance(pos_tags, list):
+            raise ValueError(f"Expected list, got {type(pos_tags).__name__}")
+        tokens = []
+        tags = []
+        for item in pos_tags:
+            if not isinstance(item, dict):
+                continue
+            token = item.get('token', '')
+            tag = item.get('tag', '')
+            if token and tag:  # Only add if both token and tag are non-empty
+                tokens.append(str(token).strip())
+                tags.append(str(tag).strip())
+        if not tokens or not tags:
+            raise ValueError("No valid tokens and tags found in response")
+        return {
+            'tokens': tokens,
+            'tags': tags
+        }
+    except Exception as e:
+        print(f"Error in LLM POS tagging: {str(e)}")
+        print(f"Falling back to traditional POS tagging...")
+        return _pos_tagging_traditional(text, "en_core_web_sm")
+def _pos_tagging_traditional(text: str, model: str) -> Dict[str, List[str]]:
+    """Use traditional POS tagging models."""
+    try:
+        import spacy
+        # Load the appropriate model
+        try:
+            nlp = spacy.load(model)
+        except OSError:
+            # Fallback to small English model if specified model is not found
+            nlp = spacy.load("en_core_web_sm")
+        # Process the text
+        doc = nlp(text)
+        # Extract tokens and POS tags
+        tokens = []
+        tags = []
+        for token in doc:
+            tokens.append(token.text)
+            tags.append(token.pos_)
+        return {
+            'tokens': tokens,
+            'tags': tags
+        }
+    except Exception as e:
+        print(f"Error in traditional POS tagging: {str(e)}")
+        return {"tokens": [], "tags": []}

tasks/retrieval.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from utils.remote_client import execute_remote_task
+def text_retrieval(query: str, model: str, documents: list) -> str:
+    resp = execute_remote_task("retrieval", {"query": query, "model": model, "documents": documents})
+    if "error" in resp:
+        return "Oops! Something went wrong. Please try again later."
+    return resp.get("matches", "")

tasks/segmentation.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from utils.remote_client import execute_remote_task
+def text_segmentation(text: str, model: str = "") -> str:
+    resp = execute_remote_task("segmentation", {"text": text, "model": model})
+    if "error" in resp:
+        return "Oops! Something went wrong. Please try again later."
+    return resp.get("segments", "")

tasks/sentiment_analysis.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from llms import LLM
+from utils.remote_client import execute_remote_task
+def sentiment_analysis(text: str, model: str, custom_instructions: str = "", use_llm: bool = True) -> str:
+    """
+    Analyze sentiment of text using either LLM or traditional (Modal API) method.
+    Args:
+        text: The text to analyze
+        model: The model to use
+        custom_instructions: Optional instructions for LLM
+        use_llm: Whether to use LLM or traditional method
+    """
+    if not text.strip():
+        return ""
+    if use_llm:
+        return _sentiment_with_llm(text, model, custom_instructions)
+    else:
+        return _sentiment_with_traditional(text, model)
+def _sentiment_with_llm(text: str, model: str, custom_instructions: str = "") -> str:
+    try:
+        llm = LLM(model=model)
+        prompt = (
+            f"Analyze the sentiment of the following text. Return ONLY one value: 'positive', 'negative', or 'neutral'.\n" +
+            (f"{custom_instructions}\n" if custom_instructions else "") +
+            f"Text: {text}\nSentiment:"
+        )
+        result = llm.generate(prompt)
+        return result.strip()
+    except Exception as e:
+        print(f"Error in LLM sentiment analysis: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _sentiment_with_traditional(text: str, model: str) -> str:
+    try:
+        payload = {
+            "text": text,
+            "model": model,
+            "task": "sentiment"
+        }
+        resp = execute_remote_task("classification", payload)
+        if "error" in resp:
+            return "Oops! Something went wrong. Please try again later."
+        return resp.get("labels", "")
+    except Exception as e:
+        print(f"Error in traditional sentiment analysis: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."

tasks/summarization.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from llms import LLM
+from utils.remote_client import execute_remote_task
+def text_summarization(text: str, model: str, summary_length: str, use_llm: bool = True) -> str:
+    """
+    Summarize the input text using either LLM or traditional (Modal API) method.
+    """
+    if not text.strip():
+        return ""
+    if use_llm:
+        return _summarization_with_llm(text, model, summary_length)
+    else:
+        return _summarization_with_traditional(text, model, summary_length)
+def _summarization_with_llm(text: str, model: str, summary_length: str) -> str:
+    try:
+        llm = LLM(model=model)
+        prompt = (
+            f"Summarize the following text in {summary_length} detail. "
+            f"Text: {text}\nSummary:"
+        )
+        summary = llm.generate(prompt)
+        return summary.strip()
+    except Exception as e:
+        print(f"Error in LLM summarization: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _summarization_with_traditional(text: str, model: str, summary_length: str) -> str:
+    try:
+        payload = {
+            "text": text,
+            "model": model,
+            "summary_length": summary_length,
+        }
+        resp = execute_remote_task("summarization", payload)
+        if "error" in resp:
+            return "Oops! Something went wrong. Please try again later."
+        return resp.get("summary", "")
+    except Exception as e:
+        print(f"Error in traditional summarization: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."

tasks/topic_classification.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from llms import LLM
+from utils.remote_client import execute_remote_task
+def topic_classification(text: str, model: str, candidate_labels=None, custom_instructions: str = "", use_llm: bool = True) -> str:
+    """
+    Classify text into topics using either LLM or traditional (Modal API) method.
+    Args:
+        text: The text to classify
+        model: The model to use
+        candidate_labels: List of candidate topics/categories
+        custom_instructions: Optional instructions for LLM
+        use_llm: Whether to use LLM or traditional method
+    """
+    if not text.strip():
+        return ""
+    if use_llm:
+        return _topic_classification_with_llm(text, model, candidate_labels, custom_instructions)
+    else:
+        return _topic_classification_with_traditional(text, model, candidate_labels)
+def _topic_classification_with_llm(text: str, model: str, candidate_labels=None, custom_instructions: str = "") -> str:
+    try:
+        llm = LLM(model=model)
+        labels_str = ", ".join(candidate_labels) if candidate_labels else "any appropriate topic"
+        prompt = (
+            f"Classify the following text into ONE of these categories: {labels_str}.\n" +
+            f"Return ONLY the most appropriate category name.\n" +
+            (f"{custom_instructions}\n" if custom_instructions else "") +
+            f"Text: {text}\nCategory:"
+        )
+        result = llm.generate(prompt)
+        return result.strip()
+    except Exception as e:
+        print(f"Error in LLM topic classification: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _topic_classification_with_traditional(text: str, model: str, labels=None) -> str:
+    try:
+        payload = {
+            "text": text,
+            "model": model,
+            "task": "topic"
+        }
+        if labels is not None:
+            payload["labels"] = labels
+        resp = execute_remote_task("classification", payload)
+        if "error" in resp:
+            return "Oops! Something went wrong. Please try again later."
+        return resp.get("labels", "")
+    except Exception as e:
+        print(f"Error in traditional topic classification: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."

tasks/translation.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from llms import LLM
+from utils.remote_client import execute_remote_task
+def text_translation(text: str, model: str, src_lang: str, tgt_lang: str, custom_instructions: str = "", use_llm: bool = True) -> str:
+    """
+    Translate the input text using either LLM or traditional (Modal API) method.
+    """
+    if not text.strip():
+        return ""
+    if use_llm:
+        return _translation_with_llm(text, model, src_lang, tgt_lang, custom_instructions)
+    else:
+        return _translation_with_traditional(text, model, src_lang, tgt_lang)
+def _translation_with_llm(text: str, model: str, src_lang: str, tgt_lang: str, custom_instructions: str = "") -> str:
+    try:
+        llm = LLM(model=model)
+        prompt = (
+            f"Translate the following text from {src_lang} to {tgt_lang}.\n" +
+            (f"{custom_instructions}\n" if custom_instructions else "") +
+            f"Text: {text}\nTranslation:"
+        )
+        translation = llm.generate(prompt)
+        return translation.strip()
+    except Exception as e:
+        print(f"Error in LLM translation: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."
+def _translation_with_traditional(text: str, model: str, src_lang: str, tgt_lang: str) -> str:
+    try:
+        payload = {
+            "text": text,
+            "model": model,
+            "src_lang": src_lang,
+            "tgt_lang": tgt_lang,
+        }
+        resp = execute_remote_task("translation", payload)
+        if "error" in resp:
+            return "Oops! Something went wrong. Please try again later."
+        return resp.get("translation", "")
+    except Exception as e:
+        print(f"Error in traditional translation: {str(e)}")
+        return "Oops! Something went wrong. Please try again later."

ui/grammar_ui.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+from tasks.grammar_checking import grammar_checking
+GRAMMAR_MODELS = ["gemini-2.0-flash"]
+DEFAULT_MODEL = "gemini-2.0-flash"
+def grammar_ui():
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=6,
+                placeholder="Enter text to check grammar and spelling...",
+                elem_id="grammar-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["This is a smple sentence with errrors."],
+                    ["I has went to the store yesterday."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            check_btn = gr.Button("Check Grammar & Spelling", variant="primary")
+            model_dropdown = gr.Dropdown(
+                GRAMMAR_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="grammar-model-dropdown"
+            )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="grammar-custom-instructions"
+            )
+        with gr.Column(scale=1):
+            output_box = gr.Textbox(
+                label="Corrected Text",
+                lines=3,
+                interactive=False,
+                elem_id="grammar-output"
+            )
+        def run_grammar_checking(text, model, custom_instructions):
+            return grammar_checking(
+                text=text,
+                model=model,
+                custom_instructions=custom_instructions,
+                use_llm=True
+            )
+        check_btn.click(
+            run_grammar_checking,
+            inputs=[input_text, model_dropdown, custom_instructions],
+            outputs=output_box
+        )

ui/intent_ui.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+from tasks.intent_detection import intent_detection
+DEFAULT_MODEL = "gemini-2.0-flash"
+INTENT_MODELS = [DEFAULT_MODEL]
+DEFAULT_INTENTS = ["book_flight", "order_food", "check_weather", "greeting", "goodbye"]
+def intent_ui():
+    with gr.Row():
+        # Left column: input/config
+        with gr.Column(scale=1):
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=6,
+                placeholder="Enter text to detect intent...",
+                elem_id="intent-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["I want to book a flight to Paris next week."],
+                    ["Can you tell me what the weather is like in Hanoi?"]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            use_custom_intents = gr.Checkbox(
+                label="Use custom intents",
+                value=True,
+                elem_id="intent-use-custom-intents"
+            )
+            intents_area = gr.TextArea(
+                label="Candidate Intents (one per line)",
+                value='\n'.join(DEFAULT_INTENTS),
+                lines=5,
+                visible=True,
+                elem_id="intent-candidate-intents"
+            )
+            def toggle_intent_area(use_custom):
+                return gr.update(visible=use_custom)
+            use_custom_intents.change(toggle_intent_area, inputs=use_custom_intents, outputs=intents_area)
+            detect_btn = gr.Button("Detect Intent", variant="primary")
+            model_dropdown = gr.Dropdown(
+                INTENT_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="intent-model-dropdown"
+            )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="intent-custom-instructions"
+            )
+        # Right column: output/result
+        with gr.Column(scale=1):
+            output_box = gr.Textbox(
+                label="Detected Intent",
+                lines=1,
+                interactive=False,
+                elem_id="intent-output"
+            )
+#             gr.Markdown("""
+# **Instructions:**
+# - Enter your text and (optionally) custom intent labels.
+# - Use the checkbox to switch between custom intent list or LLM auto-detect mode.
+# - Add any custom instructions for the LLM if needed.
+# """)
+        # Logic for button
+        def run_intent_detection(text, model, use_custom, intents, custom_instructions):
+            candidate_intents = [s.strip() for s in intents.split("\n") if s.strip()] if use_custom else None
+            return intent_detection(
+                text=text,
+                model=model,
+                candidate_intents=candidate_intents,
+                custom_instructions=custom_instructions,
+                use_llm=True
+            )
+        detect_btn.click(
+            run_intent_detection,
+            inputs=[input_text, model_dropdown, use_custom_intents, intents_area, custom_instructions],
+            outputs=output_box
+        )

ui/kg_ui.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import gradio as gr
+import pandas as pd
+from utils.ner_helpers import is_llm_model
+from typing import Dict, List, Any, Tuple
+from tasks.knowledge_graph import build_knowledge_graph, visualize_knowledge_graph_interactive
+import base64
+from io import BytesIO
+def kg_ui():
+    """Knowledge Graph UI component"""
+    # Define models
+    KG_MODELS = [
+        "gemini-2.0-flash",
+        "gpt-4",
+        "claude-2",
+        "en_core_web_sm",
+        "en_core_web_md"
+    ]
+    DEFAULT_MODEL = "gemini-2.0-flash"
+    def build_kg(text, model, custom_instructions, interactive=False):
+        """Process text for knowledge graph generation"""
+        import gradio as gr
+        if not text.strip():
+            # Trả về các giá trị rỗng cho tất cả các tab
+            return (
+                "<div style='text-align: center; color: #666; padding: 20px;'>No text provided</div>",
+                pd.DataFrame(),
+                pd.DataFrame(),
+                False, True, False, True, False, True
+            )
+        use_llm = is_llm_model(model)
+        result = build_knowledge_graph(
+            text=text,
+            model_name=model,
+            use_llm=use_llm
+        )
+        entities = result.get("entities", [])
+        relations = result.get("relations", [])
+        visualization = result.get("visualization")
+        # DataFrames
+        if entities:
+            entities_df = pd.DataFrame(entities)
+            entities_df = entities_df.rename(columns={
+                "text": "Entity",
+                "label": "Type",
+                "start": "Start Position",
+                "end": "End Position"
+            })
+        else:
+            entities_df = pd.DataFrame()
+        if relations:
+            relations_df = pd.DataFrame(relations)
+            relations_df = relations_df.rename(columns={
+                "subject": "Subject",
+                "relation": "Relation",
+                "object": "Object"
+            })
+        else:
+            relations_df = pd.DataFrame()
+        # Visualization
+        if interactive and entities and relations:
+            try:
+                interactive_html = visualize_knowledge_graph_interactive(entities, relations)
+                visualization_html = f"<div style='width:100%;overflow-x:auto'>{interactive_html}</div>"
+                viz_vis = True
+                no_viz_vis = False
+            except Exception as e:
+                visualization_html = f"<div style='color:#d32f2f;padding:20px;'>Error rendering interactive graph: {e}</div>"
+                viz_vis = True
+                no_viz_vis = False
+        elif visualization:
+            visualization_html = f"<img src='data:image/png;base64,{visualization}' style='max-width:100%;height:auto;'/>"
+            viz_vis = True
+            no_viz_vis = False
+        else:
+            visualization_html = ""
+            viz_vis = False
+            no_viz_vis = True
+        # Visibility flags
+        entities_vis = not entities_df.empty
+        no_entities_vis = not entities_vis
+        relations_vis = not relations_df.empty
+        no_relations_vis = not relations_vis
+        # Return
+        return (
+            visualization_html,
+            entities_df,
+            relations_df,
+            viz_vis,
+            no_viz_vis,
+            entities_vis,
+            no_entities_vis,
+            relations_vis,
+            no_relations_vis
+        )
+    # UI Components
+    with gr.Row():
+        with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=8,
+                placeholder="Enter text to extract knowledge graph...",
+                elem_id="kg-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["Elon Musk founded SpaceX and Tesla in the United States."],
+                    ["Amazon acquired Whole Foods in 2017."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            # Model selection
+            model = gr.Dropdown(
+                KG_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="kg-model-dropdown"
+            )
+            with gr.Accordion("Advanced Options", open=False, elem_id="kg-advanced-options"):
+                custom_instructions = gr.Textbox(
+                    label="Custom Instructions",
+                    lines=2,
+                    placeholder="(Optional) Add specific instructions for knowledge graph generation...",
+                    elem_id="kg-custom-instructions"
+                )
+            btn = gr.Button("Generate Knowledge Graph", elem_id="kg-btn")
+        with gr.Column(scale=3):
+            # Results container with tabs
+            with gr.Tabs() as output_tabs:
+                with gr.Tab("Graph Visualization", id="kg-viz-tab"):
+                    no_viz_html = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Generate a knowledge graph to visualize relationships.</div>",
+                        visible=True,
+                        elem_id="kg-no-viz"
+                    )
+                    viz_html = gr.HTML(
+                        label="Knowledge Graph Visualization",
+                        visible=False,
+                        elem_id="kg-viz-html"
+                    )
+                with gr.Tab("Entities", id="kg-entities-tab"):
+                    no_entities_html = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "No entities found. Try generating a knowledge graph first.</div>",
+                        visible=True,
+                        elem_id="kg-no-entities"
+                    )
+                    entities_table = gr.DataFrame(
+                        headers=["Entity", "Type", "Start Position", "End Position"],
+                        datatype=["str", "str", "number", "number"],
+                        visible=False,
+                        elem_id="kg-entities-table"
+                    )
+                with gr.Tab("Relationships", id="kg-relations-tab"):
+                    no_relations_html = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "No relationships found. Try generating a knowledge graph first.</div>",
+                        visible=True,
+                        elem_id="kg-no-relations"
+                    )
+                    relations_table = gr.DataFrame(
+                        headers=["Subject", "Relation", "Object"],
+                        datatype=["str", "str", "str"],
+                        visible=False,
+                        elem_id="kg-relations-table"
+                    )
+            with gr.Accordion("About Knowledge Graphs", open=False):
+                gr.Markdown("""
+                ## Knowledge Graphs
+                Knowledge graphs represent relationships between entities in text as a network. This tool:
+                - **Extracts entities**: Identifies people, places, organizations, and concepts
+                - **Maps relationships**: Shows how entities are connected to each other
+                - **Visualizes connections**: Creates an interactive graph you can explore
+                ### How it works
+                - **LLM models** can understand complex relationships in text
+                - **Traditional models** use pattern matching and syntactic parsing
+                Knowledge graphs are particularly useful for:
+                - Research and analysis
+                - Content exploration
+                - Understanding complex narratives
+                - Discovering hidden connections
+                Try it with news articles, scientific papers, or story excerpts to see different types of relationships.
+                """)
+    # Toggle for interactive/static visualization
+    with gr.Row():
+        interactive_toggle = gr.Checkbox(
+            label="Interactive Graph (pyvis)",
+            value=True,
+            elem_id="kg-interactive-toggle"
+        )
+    # Event handler: use build_kg for all outputs
+    def process_and_update_ui(text, model, custom_instructions, interactive):
+        return build_kg(text, model, custom_instructions, interactive)
+    # Wire button to unified handler
+    def gradio_output_adapter(visualization_html, entities_df, relations_df, viz_vis, no_viz_vis, entities_vis, no_entities_vis, relations_vis, no_relations_vis):
+        return [
+            gr.update(value=visualization_html, visible=viz_vis),
+            gr.update(value=entities_df, visible=entities_vis),
+            gr.update(value=relations_df, visible=relations_vis),
+            gr.update(visible=no_viz_vis),
+            gr.update(visible=no_entities_vis),
+            gr.update(visible=no_relations_vis),
+        ]
+    btn.click(
+        fn=lambda text, model, custom_instructions, interactive: gradio_output_adapter(*build_kg(text, model, custom_instructions, interactive)),
+        inputs=[input_text, model, custom_instructions, interactive_toggle],
+        outputs=[
+            viz_html, entities_table, relations_table,
+            no_viz_html, no_entities_html, no_relations_html,
+        ]
+    )
+    return None

ui/ner_ui.py ADDED Viewed

	@@ -0,0 +1,358 @@

+import gradio as gr
+from typing import Dict, List, Any
+import pandas as pd
+import json
+import re
+import html as html_lib
+from tasks.ner import named_entity_recognition
+from utils.ner_helpers import NER_ENTITY_TYPES, DEFAULT_SELECTED_ENTITIES, is_llm_model
+# The ner_ui function and related logic moved from app.py
+def ner_ui():
+    # Default entity types for the multi-select
+    DEFAULT_ENTITY_TYPES = list(NER_ENTITY_TYPES.keys())
+    def ner(text: str, model: str, entity_types: List[str]) -> Dict[str, Any]:
+        """Extract named entities, automatically using LLM for supported models."""
+        if not text.strip():
+            return {"text": "", "entities": []}
+        try:
+            use_llm = is_llm_model(model)
+            # Call the enhanced NER function
+            entities = named_entity_recognition(
+                text=text,
+                model=model,
+                use_llm=use_llm,
+                entity_types=entity_types if use_llm else None
+            )
+            # Convert to the format expected by the UI
+            if not isinstance(entities, list):
+                entities = []
+            if not use_llm and entity_types:
+                entities = [e for e in entities if e.get("type", "") in entity_types or e.get("entity", "") in entity_types]
+            return {
+                "entities": [
+                    {
+                        "entity": e.get("type", ""),
+                        "word": e.get("text", ""),
+                        "start": e.get("start", 0),
+                        "end": e.get("end", 0),
+                        "score": e.get("confidence", 1.0),
+                        "description": e.get("description", "")
+                    }
+                    for e in entities
+                ]
+            }
+        except Exception as e:
+            print(f"Error in NER: {str(e)}")
+            return {"entities": []}
+    def render_ner_html(text, entities):
+        # COMPLETELY REVISED APPROACH: Clean inline display of entities with proper positioning
+        if not text.strip() or not entities:
+            return "<div style='text-align: center; color: #666; padding: 20px;'>No named entities found in the text.</div>"
+        COLORS = [
+            '#e3f2fd', '#e8f5e9', '#fff8e1', '#f3e5f5', '#e8eaf6', '#e0f7fa',
+            '#f1f8e9', '#fce4ec', '#e8f5e9', '#f5f5f5', '#fafafa', '#e1f5fe',
+            '#fff3e0', '#d7ccc8', '#f9fbe7', '#fbe9e7', '#ede7f6', '#e0f2f1'
+        ]
+        # Clean up entities and extract necessary data
+        clean_entities = []
+        label_colors = {}
+        for ent in entities:
+            # Extract label
+            label = ent.get('type') or ent.get('entity')
+            if not label:
+                continue  # Skip entities without label
+            # Extract text
+            entity_text = ent.get('text') or ent.get('word')
+            if not entity_text:
+                continue  # Skip entities without text
+            # Get positions if available
+            start = ent.get('start', -1)
+            end = ent.get('end', -1)
+            # Verify that entity text matches the span in the original text
+            # This ensures positions are correct
+            if start >= 0 and end > start and end <= len(text):
+                span_text = text[start:end]
+                if entity_text != span_text and not text[start:end].strip().startswith(entity_text):
+                    # Try to find the entity in the text if position doesn't match
+                    found = False
+                    for match in re.finditer(re.escape(entity_text), text):
+                        if not found:
+                            start = match.start()
+                            end = match.end()
+                            found = True
+            else:
+                # Try to find the entity in the text if no position information
+                found = False
+                for match in re.finditer(re.escape(entity_text), text):
+                    if not found:
+                        start = match.start()
+                        end = match.end()
+                        found = True
+            # Assign color based on label
+            if label not in label_colors:
+                label_colors[label] = COLORS[len(label_colors) % len(COLORS)]
+            clean_entities.append({
+                'text': entity_text,
+                'label': label,
+                'color': label_colors[label],
+                'start': start,
+                'end': end
+            })
+        # Sort entities by position (important for proper rendering)
+        clean_entities.sort(key=lambda x: x['start'])
+        # Check for overlapping entities and resolve conflicts
+        non_overlapping = []
+        if clean_entities:
+            non_overlapping.append(clean_entities[0])
+            for i in range(1, len(clean_entities)):
+                current = clean_entities[i]
+                prev = non_overlapping[-1]
+                # Check if current entity overlaps with previous one
+                if current['start'] < prev['end']:
+                    # Skip overlapping entity to avoid confusion
+                    continue
+                else:
+                    non_overlapping.append(current)
+        # Generate HTML with proper inline highlighting
+        html = ["<div class='ner-highlight' style='line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;'>"]
+        # Process text sequentially with entity markers
+        last_pos = 0
+        for entity in non_overlapping:
+            start = entity['start']
+            end = entity['end']
+            # Add text before entity
+            if start > last_pos:
+                html.append(html_lib.escape(text[last_pos:start]))
+            # Add the entity with its label (with spacing between entity and label)
+            html.append(f"<span style='background:{entity['color']};border-radius:3px;padding:2px 4px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);'>")
+            html.append(f"{html_lib.escape(entity['text'])} ")
+            html.append(f"<span style='font-size:0.8em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);'>{html_lib.escape(entity['label'])}</span>")
+            html.append("</span>")
+            # Update position
+            last_pos = end
+        # Add any remaining text
+        if last_pos < len(text):
+            html.append(html_lib.escape(text[last_pos:]))
+        html.append("</div>")
+        return "".join(html)
+    def update_ui(model_id: str) -> Dict:
+        """Update the UI based on the selected model."""
+        use_llm = is_llm_model(model_id)
+        return {
+            entity_types_group: gr.Group(visible=use_llm)
+        }
+    with gr.Row():
+        with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=8,
+                placeholder="Enter text to analyze for named entities..."
+            )
+            gr.Examples(
+                examples=[
+                    ["Barack Obama was born in Hawaii and became the 44th President of the United States."],
+                    ["Google is headquartered in Mountain View, California."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            model_dropdown = gr.Dropdown(
+                ["gemini-2.0-flash"],  # Only allow gemini-2.0-flash for now
+                value="gemini-2.0-flash",
+                label="Model"
+            )
+            with gr.Group() as entity_types_group:
+                entity_types = gr.CheckboxGroup(
+                    label="Entity Types to Extract",
+                    choices=DEFAULT_ENTITY_TYPES,
+                    value=DEFAULT_SELECTED_ENTITIES,
+                    interactive=True
+                )
+                with gr.Row():
+                    select_all_btn = gr.Button("Select All", size="sm")
+                    clear_all_btn = gr.Button("Clear All", size="sm")
+            btn = gr.Button("Extract Entities", variant="primary")
+            # Button handlers for entity selection
+            def select_all_entities():
+                return gr.CheckboxGroup(value=DEFAULT_ENTITY_TYPES)
+            def clear_all_entities():
+                return gr.CheckboxGroup(value=[])
+            select_all_btn.click(
+                fn=select_all_entities,
+                outputs=[entity_types]
+            )
+            clear_all_btn.click(
+                fn=clear_all_entities,
+                outputs=[entity_types]
+            )
+        with gr.Column(scale=3):
+            # Output with tabs
+            with gr.Tabs() as output_tabs:
+                with gr.Tab("Tagged View", id="tagged-view-ner"):
+                    no_results_html = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Enter text and click 'Extract Entities' to get results.</div>",
+                        visible=True
+                    )
+                    output_html = gr.HTML(
+                        label="NER Highlighted",
+                        elem_id="ner-output-html",
+                        visible=False
+                    )
+                    # Add CSS for NER tags (scoped to this component)
+                    gr.HTML("""
+                    <style>
+                    #ner-output-html .pos-highlight {
+                        white-space: pre-wrap;
+                        line-height: 1.8;
+                        font-size: 14px;
+                        padding: 15px;
+                        border: 1px solid #e0e0e0;
+                        border-radius: 4px;
+                        background: #f9f9f9;
+                    }
+                    #ner-output-html .pos-token {
+                        display: inline-block;
+                        margin: 0 2px 4px 0;
+                        vertical-align: top;
+                        text-align: center;
+                    }
+                    #ner-output-html .token-text {
+                        display: block;
+                        padding: 2px 8px;
+                        background: #f0f4f8;
+                        border-radius: 4px 4px 0 0;
+                        border: 1px solid #dbe4ed;
+                        border-bottom: none;
+                        font-size: 0.9em;
+                    }
+                    #ner-output-html .pos-tag {
+                        display: block;
+                        padding: 2px 8px;
+                        border-radius: 0 0 4px 4px;
+                    #ner-output-html .WORK_OF_ART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; }
+                    #ner-output-html .LAW { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; }
+                    #ner-output-html .LANGUAGE { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; }
+                    #ner-output-html .DATE { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; }
+                    #ner-output-html .TIME { background-color: #fafafa; border-color: #f5f5f5; color: #616161; }
+                    #ner-output-html .PERCENT { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; }
+                    #ner-output-html .MONEY { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; }
+                    #ner-output-html .QUANTITY { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; }
+                    #ner-output-html .ORDINAL { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; }
+                    #ner-output-html .CARDINAL { background-color: #ede7f6; border-color: #d1c4e9; color: #4527a0; }
+                    </style>
+                    """)
+                with gr.Tab("Table View", id="table-view-ner"):
+                    no_results_table = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Enter text and click 'Extract Entities' to get results.</div>",
+                        visible=True
+                    )
+                    output_table = gr.Dataframe(
+                        label="Extracted Entities",
+                        headers=["Type", "Text", "Confidence", "Description"],
+                        datatype=["str", "str", "number", "str"],
+                        interactive=False,
+                        wrap=True,
+                        visible=False
+                    )
+    # Update the UI when the model changes
+    model_dropdown.change(
+        fn=update_ui,
+        inputs=[model_dropdown],
+        outputs=[entity_types_group]
+    )
+    def process_and_show_results(text: str, model: str, entity_types: List[str]):
+        """Process NER and return both the results and UI state"""
+        if not text.strip():
+            msg = "<div style='text-align: center; color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>"
+            return [
+                gr.HTML(visible=False),  # output_html
+                gr.HTML(msg, visible=True),  # no_results_html
+                gr.DataFrame(visible=False),  # output_table
+                gr.HTML(msg, visible=True)   # no_results_table
+            ]
+        if not entity_types:
+            entity_types = list(NER_ENTITY_TYPES.keys())
+        result = ner(text, model, entity_types)
+        entities = result["entities"] if result and "entities" in result else []
+        # DataFrame for table view
+        if entities:
+            df = pd.DataFrame(entities)
+            if not df.empty:
+                df = df.rename(columns={
+                    "entity": "Type",
+                    "word": "Text",
+                    "score": "Confidence",
+                    "description": "Description"
+                })
+                display_columns = ["Type", "Text", "Confidence", "Description"]
+                df = df[[col for col in display_columns if col in df.columns]]
+                if 'start' in df.columns:
+                    df = df.sort_values('start')
+                html = render_ner_html(text, entities)
+                return [
+                    gr.HTML(html, visible=True),  # output_html
+                    gr.HTML(visible=False),       # no_results_html
+                    gr.DataFrame(value=df, visible=True),  # output_table
+                    gr.HTML(visible=False)        # no_results_table
+                ]
+        # No entities found
+        msg = "<div style='text-align: center; color: #666; padding: 20px;'>No named entities found in the text.</div>"
+        return [
+            gr.HTML(msg, visible=True),   # output_html
+            gr.HTML(visible=False),       # no_results_html
+            gr.DataFrame(visible=False),  # output_table
+            gr.HTML(msg, visible=True)    # no_results_table
+        ]
+    # Set up the button click handler
+    btn.click(
+        fn=process_and_show_results,
+        inputs=[input_text, model_dropdown, entity_types],
+        outputs=[output_html, no_results_html, output_table, no_results_table]
+    )
+    # Initial UI update
+    update_ui(model_dropdown.value)
+    return None

ui/ner_ui.py.new ADDED Viewed

	@@ -0,0 +1,362 @@

+import gradio as gr
+from typing import Dict, List, Any
+import pandas as pd
+import json
+import re
+import html as html_lib
+from tasks.ner import named_entity_recognition
+from utils.ner_helpers import NER_ENTITY_TYPES, DEFAULT_SELECTED_ENTITIES, is_llm_model
+# The ner_ui function and related logic moved from app.py
+def ner_ui():
+    # Default entity types for the multi-select
+    DEFAULT_ENTITY_TYPES = list(NER_ENTITY_TYPES.keys())
+    def ner(text: str, model: str, entity_types: List[str]) -> Dict[str, Any]:
+        """Extract named entities, automatically using LLM for supported models."""
+        if not text.strip():
+            return {"text": "", "entities": []}
+        try:
+            use_llm = is_llm_model(model)
+            # Call the enhanced NER function
+            entities = named_entity_recognition(
+                text=text,
+                model=model,
+                use_llm=use_llm,
+                entity_types=entity_types if use_llm else None
+            )
+            # Convert to the format expected by the UI
+            if not isinstance(entities, list):
+                entities = []
+            if not use_llm and entity_types:
+                entities = [e for e in entities if e.get("type", "") in entity_types or e.get("entity", "") in entity_types]
+            return {
+                "entities": [
+                    {
+                        "entity": e.get("type", ""),
+                        "word": e.get("text", ""),
+                        "start": e.get("start", 0),
+                        "end": e.get("end", 0),
+                        "score": e.get("confidence", 1.0),
+                        "description": e.get("description", "")
+                    }
+                    for e in entities
+                ]
+            }
+        except Exception as e:
+            print(f"Error in NER: {str(e)}")
+            return {"entities": []}
+    def render_ner_html(text, entities):
+        # COMPLETELY REVISED APPROACH: Clean inline display of entities with proper positioning
+        if not text.strip() or not entities:
+            return "<div style='text-align: center; color: #666; padding: 20px;'>No named entities found in the text.</div>"
+        COLORS = [
+            '#e3f2fd', '#e8f5e9', '#fff8e1', '#f3e5f5', '#e8eaf6', '#e0f7fa',
+            '#f1f8e9', '#fce4ec', '#e8f5e9', '#f5f5f5', '#fafafa', '#e1f5fe',
+            '#fff3e0', '#d7ccc8', '#f9fbe7', '#fbe9e7', '#ede7f6', '#e0f2f1'
+        ]
+        # Clean up entities and extract necessary data
+        clean_entities = []
+        label_colors = {}
+        for ent in entities:
+            # Extract label
+            label = ent.get('type') or ent.get('entity')
+            if not label:
+                continue  # Skip entities without label
+            # Extract text
+            entity_text = ent.get('text') or ent.get('word')
+            if not entity_text:
+                continue  # Skip entities without text
+            # Get positions if available
+            start = ent.get('start', -1)
+            end = ent.get('end', -1)
+            # Verify that entity text matches the span in the original text
+            # This ensures positions are correct
+            if start >= 0 and end > start and end <= len(text):
+                span_text = text[start:end]
+                if entity_text != span_text and not text[start:end].strip().startswith(entity_text):
+                    # Try to find the entity in the text if position doesn't match
+                    found = False
+                    for match in re.finditer(re.escape(entity_text), text):
+                        if not found:
+                            start = match.start()
+                            end = match.end()
+                            found = True
+            else:
+                # Try to find the entity in the text if no position information
+                found = False
+                for match in re.finditer(re.escape(entity_text), text):
+                    if not found:
+                        start = match.start()
+                        end = match.end()
+                        found = True
+            # Assign color based on label
+            if label not in label_colors:
+                label_colors[label] = COLORS[len(label_colors) % len(COLORS)]
+            clean_entities.append({
+                'text': entity_text,
+                'label': label,
+                'color': label_colors[label],
+                'start': start,
+                'end': end
+            })
+        # Sort entities by position (important for proper rendering)
+        clean_entities.sort(key=lambda x: x['start'])
+        # Check for overlapping entities and resolve conflicts
+        non_overlapping = []
+        if clean_entities:
+            non_overlapping.append(clean_entities[0])
+            for i in range(1, len(clean_entities)):
+                current = clean_entities[i]
+                prev = non_overlapping[-1]
+                # Check if current entity overlaps with previous one
+                if current['start'] < prev['end']:
+                    # Skip overlapping entity to avoid confusion
+                    continue
+                else:
+                    non_overlapping.append(current)
+        # Generate HTML with proper inline highlighting
+        html = ["<div class='ner-highlight' style='line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;'>"]
+        # Process text sequentially with entity markers
+        last_pos = 0
+        for entity in non_overlapping:
+            start = entity['start']
+            end = entity['end']
+            # Add text before entity
+            if start > last_pos:
+                html.append(html_lib.escape(text[last_pos:start]))
+            # Add the entity with its label (with spacing between entity and label)
+            html.append(f"<span style='background:{entity['color']};border-radius:3px;padding:2px 4px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);'>")
+            html.append(f"{html_lib.escape(entity['text'])} ")
+            html.append(f"<span style='font-size:0.8em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);'>{html_lib.escape(entity['label'])}</span>")
+            html.append("</span>")
+            # Update position
+            last_pos = end
+        # Add any remaining text
+        if last_pos < len(text):
+            html.append(html_lib.escape(text[last_pos:]))
+        html.append("</div>")
+        return "".join(html)
+    def update_ui(model_id: str) -> Dict:
+        """Update the UI based on the selected model."""
+        use_llm = is_llm_model(model_id)
+        return {
+            entity_types_group: gr.Group(visible=use_llm)
+        }
+    with gr.Row():
+        with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=8,
+                placeholder="Enter text to analyze for named entities..."
+            )
+            model_dropdown = gr.Dropdown(
+                ["gemini-2.0-flash", "gpt-4", "claude-2", "en_core_web_sm", "en_core_web_md", "en_core_web_lg"],
+                value="gemini-2.0-flash",
+                label="Model"
+            )
+            with gr.Group() as entity_types_group:
+                entity_types = gr.CheckboxGroup(
+                    label="Entity Types to Extract",
+                    choices=DEFAULT_ENTITY_TYPES,
+                    value=DEFAULT_SELECTED_ENTITIES,
+                    interactive=True
+                )
+                with gr.Row():
+                    select_all_btn = gr.Button("Select All", size="sm")
+                    clear_all_btn = gr.Button("Clear All", size="sm")
+            btn = gr.Button("Extract Entities", variant="primary")
+            # Button handlers for entity selection
+            def select_all_entities():
+                return gr.CheckboxGroup(value=DEFAULT_ENTITY_TYPES)
+            def clear_all_entities():
+                return gr.CheckboxGroup(value=[])
+            select_all_btn.click(
+                fn=select_all_entities,
+                outputs=[entity_types]
+            )
+            clear_all_btn.click(
+                fn=clear_all_entities,
+                outputs=[entity_types]
+            )
+        with gr.Column(scale=3):
+            # Output with tabs
+            with gr.Tabs() as output_tabs:
+                with gr.Tab("Tagged View", id="tagged-view-ner"):
+                    no_results_html = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Enter text and click 'Extract Entities' to get results.</div>",
+                        visible=True
+                    )
+                    output_html = gr.HTML(
+                        label="NER Highlighted",
+                        elem_id="ner-output-html",
+                        visible=False
+                    )
+                    # Add CSS for NER tags (scoped to this component)
+                    gr.HTML("""
+                    <style>
+                    #ner-output-html .pos-highlight {
+                        white-space: pre-wrap;
+                        line-height: 1.8;
+                        font-size: 14px;
+                        padding: 15px;
+                        border: 1px solid #e0e0e0;
+                        border-radius: 4px;
+                        background: #f9f9f9;
+                    }
+                    #ner-output-html .pos-token {
+                        display: inline-block;
+                        margin: 0 2px 4px 0;
+                        vertical-align: top;
+                        text-align: center;
+                    }
+                    #ner-output-html .token-text {
+                        display: block;
+                        padding: 2px 8px;
+                        background: #f0f4f8;
+                        border-radius: 4px 4px 0 0;
+                        border: 1px solid #dbe4ed;
+                        border-bottom: none;
+                        font-size: 0.9em;
+                    }
+                    #ner-output-html .pos-tag {
+                        display: block;
+                        padding: 2px 8px;
+                        border-radius: 0 0 4px 4px;
+                        font-size: 0.8em;
+                        font-family: 'Courier New', monospace;
+                        border: 1px solid;
+                        border-top: none;
+                    }
+                    /* Example color coding for common NER labels (customize as needed) */
+                    #ner-output-html .PERSON { background-color: #e3f2fd; border-color: #bbdefb; color: #0d47a1; }
+                    #ner-output-html .ORG { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; }
+                    #ner-output-html .GPE { background-color: #fff8e1; border-color: #ffecb3; color: #ff6f00; }
+                    #ner-output-html .LOC { background-color: #f3e5f5; border-color: #e1bee7; color: #4a148c; }
+                    #ner-output-html .PRODUCT { background-color: #e8eaf6; border-color: #c5cae9; color: #1a237e; }
+                    #ner-output-html .EVENT { background-color: #e0f7fa; border-color: #b2ebf2; color: #006064; }
+                    #ner-output-html .WORK_OF_ART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; }
+                    #ner-output-html .LAW { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; }
+                    #ner-output-html .LANGUAGE { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; }
+                    #ner-output-html .DATE { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; }
+                    #ner-output-html .TIME { background-color: #fafafa; border-color: #f5f5f5; color: #616161; }
+                    #ner-output-html .PERCENT { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; }
+                    #ner-output-html .MONEY { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; }
+                    #ner-output-html .QUANTITY { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; }
+                    #ner-output-html .ORDINAL { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; }
+                    #ner-output-html .CARDINAL { background-color: #ede7f6; border-color: #d1c4e9; color: #4527a0; }
+                    </style>
+                    """)
+                with gr.Tab("Table View", id="table-view-ner"):
+                    no_results_table = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Enter text and click 'Extract Entities' to get results.</div>",
+                        visible=True
+                    )
+                    output_table = gr.Dataframe(
+                        label="Extracted Entities",
+                        headers=["Type", "Text", "Confidence", "Description"],
+                        datatype=["str", "str", "number", "str"],
+                        interactive=False,
+                        wrap=True,
+                        visible=False
+                    )
+    # Update the UI when the model changes
+    model_dropdown.change(
+        fn=update_ui,
+        inputs=[model_dropdown],
+        outputs=[entity_types_group]
+    )
+    def process_and_show_results(text: str, model: str, entity_types: List[str]):
+        """Process NER and return both the results and UI state"""
+        if not text.strip():
+            msg = "<div style='text-align: center; color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>"
+            return [
+                gr.HTML(visible=False),  # output_html
+                gr.HTML(msg, visible=True),  # no_results_html
+                gr.DataFrame(visible=False),  # output_table
+                gr.HTML(msg, visible=True)   # no_results_table
+            ]
+        if not entity_types:
+            entity_types = list(NER_ENTITY_TYPES.keys())
+        result = ner(text, model, entity_types)
+        entities = result["entities"] if result and "entities" in result else []
+        # DataFrame for table view
+        if entities:
+            df = pd.DataFrame(entities)
+            if not df.empty:
+                df = df.rename(columns={
+                    "entity": "Type",
+                    "word": "Text",
+                    "score": "Confidence",
+                    "description": "Description"
+                })
+                display_columns = ["Type", "Text", "Confidence", "Description"]
+                df = df[[col for col in display_columns if col in df.columns]]
+                if 'start' in df.columns:
+                    df = df.sort_values('start')
+                html = render_ner_html(text, entities)
+                return [
+                    gr.HTML(html, visible=True),  # output_html
+                    gr.HTML(visible=False),       # no_results_html
+                    gr.DataFrame(value=df, visible=True),  # output_table
+                    gr.HTML(visible=False)        # no_results_table
+                ]
+        # No entities found
+        msg = "<div style='text-align: center; color: #666; padding: 20px;'>No named entities found in the text.</div>"
+        return [
+            gr.HTML(msg, visible=True),   # output_html
+            gr.HTML(visible=False),       # no_results_html
+            gr.DataFrame(visible=False),  # output_table
+            gr.HTML(msg, visible=True)    # no_results_table
+        ]
+    # Set up the button click handler
+    btn.click(
+        fn=process_and_show_results,
+        inputs=[input_text, model_dropdown, entity_types],
+        outputs=[output_html, no_results_html, output_table, no_results_table]
+    )
+    # Initial UI update
+    update_ui(model_dropdown.value)
+    return None

ui/pos_ui.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import gradio as gr
+from utils.ner_helpers import is_llm_model
+import pandas as pd
+import json
+from typing import Dict, List
+from tasks.pos_tagging import pos_tagging
+from utils.pos_helpers import *
+# POS UI
+def pos_ui():
+    # UI Components
+    with gr.Row():
+        with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=8,
+                placeholder="Enter text to analyze for part-of-speech tags...",
+                elem_id="pos-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["The cat is sitting on the mat."],
+                    ["She quickly finished her homework before dinner."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            # Tag selection
+            with gr.Group():
+                tag_selection = gr.CheckboxGroup(
+                    label="POS Tags to Display",
+                    # choices=[(f"{tag} - {desc}", tag) for tag, desc in POS_TAG_DESCRIPTIONS.items()],
+                    choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()],
+                    value=DEFAULT_SELECTED_TAGS,
+                    interactive=True
+                )
+                with gr.Row():
+                    select_all_btn = gr.Button("Select All", size="sm")
+                    clear_all_btn = gr.Button("Clear All", size="sm")
+            # Model selection at the bottom
+            with gr.Row():
+                model_dropdown = gr.Dropdown(
+                    POS_MODELS,
+                    value=DEFAULT_MODEL,
+                    label="Model",
+                    interactive=True,
+                    elem_id="pos-model-dropdown"
+                )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="pos-custom-instructions"
+            )
+            # Submit button
+            submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn")
+            # Button event handlers
+            def select_all_tags():
+                return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS)
+            def clear_all_tags():
+                return gr.CheckboxGroup(value=[])
+            select_all_btn.click(
+                fn=select_all_tags,
+                outputs=[tag_selection]
+            )
+            clear_all_btn.click(
+                fn=clear_all_tags,
+                outputs=[tag_selection]
+            )
+        with gr.Column(scale=3):
+            # Results container with tabs
+            with gr.Tabs() as output_tabs:
+                with gr.Tab("Tagged View", id="tagged-view"):
+                    no_results_html = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Enter text and click 'Tag Text' to analyze.</div>",
+                        visible=True
+                    )
+                    output_html = gr.HTML(
+                        label="POS Tags",
+                        elem_id="pos-output-html",
+                        visible=False
+                    )
+                with gr.Tab("Table View", id="table-view"):
+                    no_results_table = gr.HTML(
+                        "<div style='text-align: center; color: #666; padding: 20px;'>"
+                        "Enter text and click 'Tag Text' to analyze.</div>",
+                        visible=True
+                    )
+                    output_table = gr.Dataframe(
+                        label="POS Tags",
+                        headers=["Token", "POS Tag"],
+                        datatype=["str", "str"],
+                        interactive=False,
+                        wrap=True,
+                        elem_id="pos-output-table",
+                        visible=False
+                    )
+                # Add CSS for the POS tags (scoped to this component)
+                gr.HTML("""
+                <style>
+                #pos-output-html .pos-highlight {
+                    white-space: pre-wrap;
+                    line-height: 1.8;
+                    font-size: 14px;
+                    padding: 15px;
+                    border: 1px solid #e0e0e0;
+                    border-radius: 4px;
+                    background: #f9f9f9;
+                }
+                #pos-output-html .pos-token {
+                    display: inline-block;
+                    margin: 0 2px 4px 0;
+                    vertical-align: top;
+                    text-align: center;
+                }
+                #pos-output-html .token-text {
+                    display: block;
+                    padding: 2px 8px;
+                    background: #f0f4f8;
+                    border-radius: 4px 4px 0 0;
+                    border: 1px solid #dbe4ed;
+                    border-bottom: none;
+                    font-size: 0.9em;
+                }
+                #pos-output-html .pos-tag {
+                    display: block;
+                    padding: 2px 8px;
+                    border-radius: 0 0 4px 4px;
+                    font-size: 0.8em;
+                    font-family: 'Courier New', monospace;
+                    border: 1px solid;
+                    border-top: none;
+                }
+                /* Color coding for common POS tags */
+                #pos-output-html .NOUN { background-color: #e3f2fd; border-color: #bbdefb; color: #0d47a1; }
+                #pos-output-html .VERB { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; }
+                #pos-output-html .ADJ { background-color: #fff8e1; border-color: #ffecb3; color: #ff6f00; }
+                #pos-output-html .ADV { background-color: #f3e5f5; border-color: #e1bee7; color: #4a148c; }
+                #pos-output-html .PRON { background-color: #e8eaf6; border-color: #c5cae9; color: #1a237e; }
+                #pos-output-html .DET { background-color: #e0f7fa; border-color: #b2ebf2; color: #006064; }
+                #pos-output-html .ADP { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; }
+                #pos-output-html .CONJ, #pos-output-html .CCONJ, #pos-output-html .SCONJ { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; }
+                #pos-output-html .NUM { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; }
+                #pos-output-html .PUNCT { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; }
+                #pos-output-html .X, #pos-output-html .SYM { background-color: #fafafa; border-color: #f5f5f5; color: #616161; }
+                #pos-output-html .PROPN { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; }
+                #pos-output-html .AUX { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; }
+                #pos-output-html .PART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; }
+                #pos-output-html .INTJ { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; }
+                </style>
+                """)
+    def format_pos_result(result, selected_tags=None):
+        import html
+        if not result or "tokens" not in result or "tags" not in result:
+            return "<div style='text-align: center; color: #666; padding: 20px;'>No POS tags found or invalid result format.</div>", pd.DataFrame(columns=["Token", "POS Tag"])
+        if selected_tags is None:
+            selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
+        pos_colors = {
+            "NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1",
+            "ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa",
+            "ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec",
+            "SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5",
+            "X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe",
+            "AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0"
+        }
+        html_parts = ['<div style="line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;">']
+        df_data = []
+        for word, tag in zip(result["tokens"], result["tags"]):
+            clean_tag = tag.split('-')[0].split('_')[0].upper()
+            if clean_tag not in STANDARD_POS_TAGS:
+                clean_tag = "X"
+            df_data.append({"Token": word, "POS Tag": clean_tag})
+            if clean_tag not in selected_tags:
+                html_parts.append(f'{html.escape(word)} ')
+                continue
+            color = pos_colors.get(clean_tag, "#f0f0f0")
+            html_parts.append(f'<span style="background:{color};border-radius:3px;padding:0 2px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);">')
+            html_parts.append(f'{html.escape(word)} ')
+            html_parts.append(f'<span style="font-size:0.7em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);">{clean_tag}</span>')
+            html_parts.append('</span>')
+        html_parts.append('</div>')
+        import pandas as pd
+        df = pd.DataFrame(df_data)
+        if selected_tags is not None:
+            df = df[df["POS Tag"].isin(selected_tags)]
+        df = df.reset_index(drop=True)
+        return "".join(html_parts), df
+    def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list):
+        if not text.strip():
+            return [
+                gr.HTML("<div style='color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>", visible=True),
+                gr.HTML(visible=False),  # no_results_html
+                gr.DataFrame(visible=False),  # output_table
+                gr.HTML(visible=False)   # no_results_table
+            ]
+        use_llm = is_llm_model(model)
+        if not selected_tags:
+            selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
+        try:
+            yield [
+                gr.HTML("<div class='pos-highlight'>Processing... This may take a moment for large texts.</div>", visible=True),
+                gr.HTML(visible=False),  # no_results_html
+                gr.DataFrame(visible=False),  # output_table
+                gr.HTML(visible=False)   # no_results_table
+            ]
+            result = pos_tagging(
+                text=text,
+                model=model,
+                custom_instructions=custom_instructions if use_llm else "",
+                use_llm=use_llm
+            )
+            if "error" in result:
+                error_msg = result['error']
+                if "API key" in error_msg or "authentication" in error_msg.lower():
+                    error_msg += " Please check your API key configuration."
+                yield [
+                    gr.HTML(f"<div style='color: #d32f2f; padding: 20px;'>{error_msg}</div>", visible=True),
+                    gr.HTML(visible=False),  # no_results_html
+                    gr.DataFrame(visible=False),  # output_table
+                    gr.HTML(visible=False)   # no_results_table
+                ]
+                return
+            html, table = format_pos_result(result, selected_tags)
+            if not table.empty:
+                yield [
+                    gr.HTML(html, visible=True),  # output_html
+                    gr.HTML(visible=False),         # no_results_html
+                    gr.DataFrame(value=table, visible=True),  # output_table
+                    gr.HTML(visible=False)          # no_results_table
+                ]
+            else:
+                empty_msg = "<div class='pos-highlight' style='text-align: center; color: #666; padding: 20px;'>No POS tags could be extracted from the text.</div>"
+                yield [
+                    gr.HTML(empty_msg, visible=True),  # output_html
+                    gr.HTML(visible=False),           # no_results_html
+                    gr.DataFrame(visible=False),       # output_table
+                    gr.HTML(empty_msg, visible=True)   # no_results_table
+                ]
+        except Exception as e:
+            import traceback
+            error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}"
+            print(error_msg)  # Log the full error
+            yield [
+                gr.HTML("<div class='pos-highlight' style='color: #d32f2f; padding: 20px;'>An error occurred while processing your request. Please try again.</div>", visible=True),
+                gr.HTML(visible=False),  # no_results_html
+                gr.DataFrame(visible=False),  # output_table
+                gr.HTML(visible=False)   # no_results_table
+            ]
+    def update_ui(model_name: str) -> Dict:
+        use_llm = is_llm_model(model_name)
+        return {
+            custom_instructions: gr.Textbox(visible=use_llm)
+        }
+    def clear_inputs():
+        return "", "", ""
+    model_dropdown.change(
+        fn=update_ui,
+        inputs=[model_dropdown],
+        outputs=[custom_instructions]
+    )
+    submit_btn.click(
+        fn=process_pos,
+        inputs=[input_text, model_dropdown, custom_instructions, tag_selection],
+        outputs=[output_html, no_results_html, output_table, no_results_table],
+        show_progress=True
+    )
+    gr.HTML("""
+    <style>
+    /* Style for the tabs */
+    #tagged-view, #table-view {
+        padding: 15px;
+    }
+    /* Make the tabs more visible */
+    .tab-nav {
+        margin-bottom: 10px;
+        border-bottom: 1px solid #e0e0e0;
+    }
+    .tab-nav button {
+        padding: 8px 16px;
+        margin-right: 5px;
+        border: 1px solid #e0e0e0;
+        background: #f5f5f5;
+        border-radius: 4px 4px 0 0;
+        cursor: pointer;
+    }
+    .tab-nav button.selected {
+        background: #ffffff;
+        border-bottom: 2px solid #0e7490;
+        font-weight: bold;
+    }
+    </style>
+    """)
+    custom_instructions.visible = is_llm_model(DEFAULT_MODEL)
+    return None

ui/sentiment_ui.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import gradio as gr
+from utils.ner_helpers import is_llm_model
+from typing import Dict, List, Any
+from tasks.sentiment_analysis import sentiment_analysis
+def sentiment_ui():
+    """Sentiment analysis UI component"""
+    # Define models
+    SENTIMENT_MODELS = [
+        "gemini-2.0-flash"  # Only allow gemini-2.0-flash for now
+        # "gpt-4",
+        # "claude-2",
+        # "distilbert-base-uncased-finetuned-sst-2-english",
+        # "finiteautomata/bertweet-base-sentiment-analysis"
+    ]
+    DEFAULT_MODEL = "gemini-2.0-flash"
+    def analyze_sentiment(text, model, custom_instructions):
+        """Process text for sentiment analysis"""
+        if not text.strip():
+            return "No text provided"
+        use_llm = is_llm_model(model)
+        result = sentiment_analysis(
+            text=text,
+            model=model,
+            custom_instructions=custom_instructions,
+            use_llm=use_llm
+        )
+        # Try to normalize the result
+        result = result.lower().strip()
+        if "positive" in result:
+            return "Positive"
+        elif "negative" in result:
+            return "Negative"
+        elif "neutral" in result:
+            return "Neutral"
+        else:
+            # Return as is for other results
+            return result
+    # UI Components
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=6,
+                placeholder="Enter text to analyze sentiment...",
+                elem_id="sentiment-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["I am very satisfied with the customer service of this company."],
+                    ["The product did not meet my expectations and I am disappointed."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            model = gr.Dropdown(
+                SENTIMENT_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="sentiment-model-dropdown"
+            )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="sentiment-custom-instructions"
+            )
+            btn = gr.Button("Analyze Sentiment", variant="primary", elem_id="sentiment-analyze-btn")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Sentiment Analysis",
+                elem_id="sentiment-output"
+            )
+            # with gr.Accordion("About Sentiment Analysis", open=False):
+            #     gr.Markdown("""
+            #     ## Sentiment Analysis
+            #     Sentiment analysis identifies the emotional tone behind text. The model analyzes your input text and classifies it as:
+            #     - **Positive**: Text expresses positive emotions, approval, or optimism
+            #     - **Negative**: Text expresses negative emotions, criticism, or pessimism
+            #     - **Neutral**: Text is factual or does not express strong sentiment
+            #     ### Model Types
+            #     - **LLM Models** (Gemini, GPT, Claude): Provide sophisticated analysis with better understanding of context
+            #     - **Traditional Models**: Specialized models trained specifically for sentiment analysis tasks
+            #     Use the advanced options to customize how the model analyzes your text.
+            #     """)
+    # Event handlers
+    btn.click(
+        analyze_sentiment,
+        inputs=[input_text, model, custom_instructions],
+        outputs=output
+    )
+    return None

ui/summarization_ui.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import gradio as gr
+from utils.ner_helpers import is_llm_model
+from typing import Dict, List, Any
+from tasks.summarization import text_summarization
+def summarization_ui():
+    """Summarization UI component"""
+    # Define models
+    SUMMARY_MODELS = [
+        "gemini-2.0-flash"  # Only allow gemini-2.0-flash for now
+        # "gpt-4",
+        # "claude-2",
+        # "facebook/bart-large-cnn",
+        # "t5-small",
+        # "qwen/Qwen2.5-3B-Instruct"
+    ]
+    DEFAULT_MODEL = "gemini-2.0-flash"
+    def summarize(text, model, summary_length, custom_instructions):
+        """Process text for summarization"""
+        if not text.strip():
+            return "No text provided"
+        use_llm = is_llm_model(model)
+        result = text_summarization(
+            text=text,
+            model=model,
+            summary_length=summary_length,
+            use_llm=use_llm
+        )
+        # Lưu ý: custom_instructions sẽ được sử dụng trong tương lai khi API hỗ trợ
+        return result
+    # UI Components
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=8,
+                placeholder="Enter text to summarize...",
+                elem_id="summary-input-text"
+            )
+            summary_length = gr.Radio(
+                ["Short", "Medium", "Long"],
+                value="Medium",
+                label="Summary Length",
+                elem_id="summary-length-radio"
+            )
+            model = gr.Dropdown(
+                SUMMARY_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="summary-model-dropdown"
+            )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="summary-custom-instructions"
+            )
+            btn = gr.Button("Summarize", variant="primary", elem_id="summary-btn")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Summary",
+                lines=10,
+                elem_id="summary-output"
+            )
+            # with gr.Accordion("About Summarization", open=False):
+            #     gr.Markdown("""
+            #     ## Text Summarization
+            #     Text summarization condenses a document while preserving key information. This tool offers:
+            #     - **Length control**: Choose between short, medium, or long summaries
+            #     - **Multiple models**: Select from LLMs (like Gemini and GPT) or traditional models
+            #     - **Custom instructions**: Tailor the summarization to your specific needs
+            #     ### How it works
+            #     - **LLM models** process your text using natural language understanding
+            #     - **Traditional models** use extractive or abstractive techniques to identify and condense key information
+            #     For best results with long texts, try different summary lengths to find the right balance between brevity and detail.
+            #     """)
+    # Event handlers
+    btn.click(
+        summarize,
+        inputs=[input_text, model, summary_length, custom_instructions],
+        outputs=output
+    )
+    return None

ui/topic_ui.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import gradio as gr
+from utils.ner_helpers import is_llm_model
+from typing import Dict, List, Any
+from tasks.topic_classification import topic_classification
+def topic_ui():
+    """Topic classification UI component"""
+    # Define models and default labels
+    TOPIC_MODELS = [
+        "gemini-2.0-flash"  # Only allow gemini-2.0-flash for now
+        # "gpt-4",
+        # "claude-2",
+        # "facebook/bart-large-mnli",
+        # "joeddav/xlm-roberta-large-xnli"
+    ]
+    DEFAULT_MODEL = "gemini-2.0-flash"
+    DEFAULT_LABELS = [
+        "Sports", "Economy", "Politics", "Entertainment", "Technology", "Education", "Law"
+    ]
+    def classify(text, model, use_custom, labels, custom_instructions):
+        """Process text for topic classification"""
+        if not text.strip():
+            return "No text provided"
+        use_llm = is_llm_model(model)
+        label_list = [l.strip() for l in labels.split('\n') if l.strip()] if use_custom else None
+        if use_custom and (not label_list or len(label_list) == 0):
+            return "Please provide at least one category"
+        result = topic_classification(
+            text=text,
+            model=model,
+            candidate_labels=label_list,
+            custom_instructions=custom_instructions,
+            use_llm=use_llm
+        )
+        return result.strip()
+    # UI Components
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=6,
+                placeholder="Enter text to classify...",
+                elem_id="topic-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["Apple has announced the release of a new iPhone model this fall."],
+                    ["The United Nations held a climate summit to discuss global warming solutions."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            use_custom_topics = gr.Checkbox(
+                label="Use custom topics",
+                value=True,
+                elem_id="topic-use-custom-topics"
+            )
+            topics_area = gr.TextArea(
+                label="Candidate Topics (one per line)",
+                value='\n'.join(DEFAULT_LABELS),
+                lines=5,
+                visible=True,
+                elem_id="topic-candidate-topics"
+            )
+            def toggle_topics_area(use_custom):
+                return gr.update(visible=use_custom)
+            use_custom_topics.change(toggle_topics_area, inputs=use_custom_topics, outputs=topics_area)
+            model = gr.Dropdown(
+                TOPIC_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="topic-model-dropdown"
+            )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="topic-custom-instructions"
+            )
+            classify_btn = gr.Button("Classify Topic", variant="primary", elem_id="topic-classify-btn")
+        with gr.Column():
+            output_box = gr.Textbox(
+                label="Classification Result",
+                lines=2,
+                elem_id="topic-output"
+            )
+        def run_topic_classification(text, model, use_custom, topics, custom_instructions):
+            return classify(text, model, use_custom, topics, custom_instructions)
+        classify_btn.click(
+            run_topic_classification,
+            inputs=[input_text, model, use_custom_topics, topics_area, custom_instructions],
+            outputs=output_box
+        )
+            #     4. Click "Classify" to analyze
+            #     ### Model Types
+            #     - **LLM Models** (Gemini, GPT, Claude): Provide sophisticated classification with better understanding of context and nuance
+            #     - **Traditional Models**: Specialized models trained specifically for zero-shot classification tasks
+            #     Use the advanced options to customize how the model classifies your text.
+            #     """)
+    return None

ui/translation_ui.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+from utils.ner_helpers import is_llm_model
+from typing import Dict, List, Any
+from tasks.translation import text_translation
+def translation_ui():
+    """Translation UI component"""
+    # Define models
+    TRANSLATION_MODELS = [
+        "gemini-2.0-flash"  # Only allow gemini-2.0-flash for now
+        # "gpt-4",
+        # "claude-2",
+        # "Helsinki-NLP/opus-mt-en-vi",
+        # "Helsinki-NLP/opus-mt-vi-en"
+    ]
+    DEFAULT_MODEL = "gemini-2.0-flash"
+    def translate(text, model, src_lang, tgt_lang, custom_instructions):
+        """Process text for translation"""
+        if not text.strip():
+            return "No text provided"
+        use_llm = is_llm_model(model)
+        result = text_translation(
+            text=text,
+            model=model,
+            src_lang=src_lang,
+            tgt_lang=tgt_lang,
+            custom_instructions=custom_instructions,
+            use_llm=use_llm
+        )
+        return result
+    # UI Components
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=8,
+                placeholder="Enter text to translate...",
+                elem_id="translation-input-text"
+            )
+            gr.Examples(
+                examples=[
+                    ["Vietnam's economy has grown rapidly in the past decade."],
+                    ["The football match between Manchester United and Chelsea was very exciting."]
+                ],
+                inputs=[input_text],
+                label="Examples"
+            )
+            with gr.Row():
+                pass
+            src_lang = gr.Textbox(
+                label="Source Language (e.g., en, vi, ja)",
+                value="en",
+                elem_id="translation-src-lang"
+            )
+            tgt_lang = gr.Textbox(
+                label="Target Language (e.g., en, vi, ja)",
+                value="vi",
+                elem_id="translation-tgt-lang"
+            )
+            model = gr.Dropdown(
+                TRANSLATION_MODELS,
+                value=DEFAULT_MODEL,
+                label="Model",
+                interactive=True,
+                elem_id="translation-model-dropdown"
+            )
+            custom_instructions = gr.Textbox(
+                label="Custom Instructions (optional)",
+                lines=2,
+                placeholder="Add any custom instructions for the model...",
+                elem_id="translation-custom-instructions"
+            )
+            btn = gr.Button("Translate", variant="primary", elem_id="translation-btn")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Translation",
+                lines=10,
+                elem_id="translation-output"
+            )
+            # with gr.Accordion("About Translation", open=False):
+            #     gr.Markdown("""
+            #     ## Text Translation
+            #     Text translation converts text from one language to another. This tool offers:
+            #     - **Multiple languages**: Translate between any language pair
+            #     - **Multiple models**: Select from LLMs (like Gemini and GPT) or specialized translation models
+            #     - **Custom instructions**: Tailor the translation to specific domains or styles
+            #     ### Language Codes
+            #     Use standard language codes like:
+            #     - `en` for English
+            #     - `vi` for Vietnamese
+            #     - `ja` for Japanese
+            #     - `fr` for French
+            #     - `es` for Spanish
+            #     - `ko` for Korean
+            #     ### Tips
+            #     - LLM models perform better on complex or nuanced translations
+            #     - Specialized models might be faster for common language pairs
+            #     - Use custom instructions to specify tones (formal/informal) or domains (technical/literary)
+            #     """)
+    # Event handlers
+    btn.click(
+        translate,
+        inputs=[input_text, model, src_lang, tgt_lang, custom_instructions],
+        outputs=output
+    )
+    return None

utils/ner_helpers.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# NER helpers and constants
+from typing import List
+# Standard NER entity types with descriptions
+NER_ENTITY_TYPES = {
+    "PERSON": "People, including fictional",
+    "ORG": "Companies, agencies, institutions, etc.",
+    "GPE": "Countries, cities, states",
+    "LOC": "Non-GPE locations, mountain ranges, bodies of water",
+    "PRODUCT": "Objects, vehicles, foods, etc. (not services)",
+    "EVENT": "Named hurricanes, battles, wars, sports events, etc.",
+    "WORK_OF_ART": "Titles of books, songs, etc.",
+    "LAW": "Named documents made into laws",
+    "LANGUAGE": "Any named language",
+    "DATE": "Absolute or relative dates or periods",
+    "TIME": "Times smaller than a day",
+    "PERCENT": "Percentage (including '%')",
+    "MONEY": "Monetary values, including unit",
+    "QUANTITY": "Measurements, as of weight or distance",
+    "ORDINAL": "'first', 'second', etc.",
+    "CARDINAL": "Numerals that do not fall under another type",
+    "NORP": "Nationalities or religious or political groups",
+    "FAC": "Buildings, airports, highways, bridges, etc.",
+    "PRODUCT": "Objects, vehicles, foods, etc. (not services)",
+    "EVENT": "Named hurricanes, battles, wars, sports events, etc.",
+    "WORK_OF_ART": "Titles of books, songs, etc.",
+    "LAW": "Named documents made into laws",
+    "LANGUAGE": "Any named language"
+}
+# Default selected entity types (first 5 by default)
+DEFAULT_SELECTED_ENTITIES = list(NER_ENTITY_TYPES.keys())[:5]
+LLM_MODELS = ["gemini", "gpt", "claude"]
+def is_llm_model(model_id: str) -> bool:
+    """Check if the model is an LLM-based model."""
+    return any(llm_model in model_id.lower() for llm_model in LLM_MODELS)
+# Render NER HTML for tagged view
+def render_ner_html(text, entities, selected_entity_types=None):
+    import html as html_lib
+    import re
+    if not text.strip() or not entities:
+        return "<div style='text-align: center; color: #666; padding: 20px;'>No named entities found in the text.</div>"
+    if selected_entity_types is None:
+        selected_entity_types = list(NER_ENTITY_TYPES.keys())
+    COLORS = [
+        '#e3f2fd', '#e8f5e9', '#fff8e1', '#f3e5f5', '#e8eaf6', '#e0f7fa',
+        '#f1f8e9', '#fce4ec', '#f5f5f5', '#fafafa', '#e1f5fe', '#f3e5f5', '#f1f8e9'
+    ]
+    # Sort and filter entities by start position and selected types
+    entities = sorted(entities, key=lambda e: e.get('start', 0))
+    non_overlapping = []
+    for e in entities:
+        if e.get('type', '') in selected_entity_types or e.get('entity', '') in selected_entity_types:
+            if not non_overlapping or e['start'] >= non_overlapping[-1]['end']:
+                label = e.get('type', e.get('entity', ''))
+                color = COLORS[hash(label) % len(COLORS)]
+                non_overlapping.append({
+                    'start': e['start'],
+                    'end': e['end'],
+                    'label': label,
+                    'text': e.get('word', e.get('text', '')),
+                    'color': color
+                })
+    filtered_entities = [entity for entity in non_overlapping if entity['label'] in selected_entity_types]
+    html = ["<div class='ner-highlight' style='line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;'>"]
+    if not filtered_entities:
+        html.append("<div style='text-align: center; color: #666; padding: 20px;'>")
+        html.append("No entities of the selected types found in the text.")
+        html.append("</div>")
+    else:
+        last_pos = 0
+        for entity in filtered_entities:
+            start = entity['start']
+            end = entity['end']
+            if start > last_pos:
+                html.append(html_lib.escape(text[last_pos:start]))
+            html.append(f"<span style='background:{entity['color']};border-radius:3px;padding:2px 4px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);'>")
+            html.append(f"{html_lib.escape(entity['text'])} ")
+            html.append(f"<span style='font-size:0.8em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);'>{html_lib.escape(entity['label'])}</span>")
+            html.append("</span>")
+            last_pos = end
+    if last_pos < len(text):
+        html.append(html_lib.escape(text[last_pos:]))
+    html.append("</div>")
+    return "".join(html)

utils/pos_helpers.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# POS helpers and constants
+POS_MODELS = [
+    "gemini-2.0-flash"  # Only allow gemini-2.0-flash for now
+    # "gpt-4",
+    # "claude-2",
+    # "vblagoje/bert-english-uncased-finetuned-pos",
+    # "QCRI/bert-base-multilingual-cased-pos-english"
+]
+DEFAULT_MODEL = "gemini-2.0-flash"
+STANDARD_POS_TAGS = [
+    "ADJ", "ADP", "ADV", "AUX", "CONJ", "CCONJ", "DET", "INTJ", "NOUN",
+    "NUM", "PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X"
+]
+POS_TAG_DESCRIPTIONS = {
+    "ADJ": "Adjective (big, old, green, interesting)",
+    "ADP": "Adposition (in, to, during)",
+    "ADV": "Adverb (very, well, there, tomorrow)",
+    "AUX": "Auxiliary verb (is, has (done), will (do), should (do))",
+    "CCONJ": "Coordinating conjunction (and, or, but)",
+    "DET": "Determiner (a, an, the, this, those)",
+    "INTJ": "Interjection (oh, hey, oops, hmm)",
+    "NOUN": "Noun (dog, cat, man, house, idea)",
+    "NUM": "Numeral (one, two, 3, 55, 2019)",
+    "PART": "Particle (not, 's, let's)",
+    "PRON": "Pronoun (I, you, he, she, it, we, they, me, him, her, us, them)",
+    "PROPN": "Proper noun (John, Mary, London, Microsoft)",
+    "PUNCT": "Punctuation (.,!?;:)",
+    "SCONJ": "Subordinating conjunction (if, because, as, that)",
+    "SYM": "Symbol (%, $, §, ©)",
+    "VERB": "Verb (run, runs, running, eat, ate, eaten)",
+    "X": "Other (foreign words, typos, etc.)"
+}
+DEFAULT_SELECTED_TAGS = list(POS_TAG_DESCRIPTIONS.keys())

utils/remote_client.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import requests
+from typing import Dict, Any
+from dotenv import load_dotenv
+load_dotenv()
+# Timeout in seconds
+TIMEOUT = int(os.getenv("REMOTE_SERVICE_TIMEOUT", "300"))
+def execute_remote_task(task_name: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Execute a remote task using the configured remote service.
+    Args:
+        task_name: Name of the task to execute (e.g., 'summarization', 'translation')
+        payload: Dictionary containing task-specific parameters
+    Returns:
+        Dictionary containing the task result
+    Raises:
+        requests.RequestException: If there's an error with the remote request
+    """
+    # Get the endpoint from environment variables
+    endpoint = os.getenv(f"REMOTE_ENDPOINT_{task_name.upper()}")
+    if not endpoint:
+        raise ValueError(f"No endpoint configured for task: {task_name}")
+    try:
+        response = requests.post(
+            url=endpoint,
+            json={"task": task_name, **payload},
+            timeout=TIMEOUT
+        )
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        error_msg = f"Error calling remote service for {task_name}: {str(e)}"
+        if hasattr(e, 'response') and e.response is not None:
+            error_msg += f" - {e.response.text}"
+        return {"error": error_msg}

utils/shared.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Shared helpers and utilities