IsmatS's picture
Add application file
9d445eb
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
# Load the model and tokenizer
model_name = "IsmatS/xlm-roberta-az-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
nlp_ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
# Define label mapping (same as in your main.py)
label_mapping = {
"LABEL_0": "Other",
"LABEL_1": "Person",
"LABEL_2": "Location",
"LABEL_3": "Organization",
"LABEL_4": "Date",
"LABEL_5": "Time",
"LABEL_6": "Money",
"LABEL_7": "Percentage",
"LABEL_8": "Facility",
"LABEL_9": "Product",
"LABEL_10": "Event",
"LABEL_11": "Art",
"LABEL_12": "Law",
"LABEL_13": "Language",
"LABEL_14": "Government",
"LABEL_15": "Nationality or Religion",
"LABEL_16": "Ordinal",
"LABEL_17": "Cardinal",
"LABEL_18": "Disease",
"LABEL_19": "Contact",
"LABEL_20": "Proverb or Saying",
"LABEL_21": "Quantity",
"LABEL_22": "Miscellaneous",
"LABEL_23": "Position",
"LABEL_24": "Project"
}
def process_text(text):
if not text.strip():
return "Please enter some text to analyze."
# Get NER results
ner_results = nlp_ner(text)
# Initialize dictionary to store entities by type
entities_by_type = {}
# Process each detected entity
for entity in ner_results:
# Get the human-readable label
entity_type = label_mapping.get(entity["entity_group"], entity["entity_group"])
# Filter out non-entities (label "Other" in this case)
if entity_type == "Other":
continue
# Add entity to the dictionary by its type
if entity_type not in entities_by_type:
entities_by_type[entity_type] = []
# Append the entity word to the corresponding type list
entities_by_type[entity_type].append(f"{entity['word']} (score: {entity['score']:.2f})")
# Format the output as HTML
if not entities_by_type:
return "No entities detected in the provided text."
result_html = "<div style='text-align:left;'>"
for entity_type, entities in entities_by_type.items():
result_html += f"<h3>{entity_type}</h3>"
result_html += "<ul>"
for entity in entities:
result_html += f"<li>{entity}</li>"
result_html += "</ul>"
result_html += "</div>"
return result_html
# Create Gradio interface
demo = gr.Interface(
fn=process_text,
inputs=gr.Textbox(
lines=5,
placeholder="Enter Azerbaijani text here...",
value="2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub."
),
outputs=gr.HTML(),
title="Azerbaijani Named Entity Recognition",
description="Identify named entities such as persons, locations, organizations, and dates in Azerbaijani text.",
examples=[
["2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub."],
["Bakı şəhərində Azərbaycan Respublikasının prezidenti İlham Əliyev."],
["Apple şirkəti 24 Sentyabr tarixində iPhone 15 modelini təqdim etdi."]
],
theme=gr.themes.Soft()
)
# Launch the app
if __name__ == "__main__":
demo.launch()