Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification | |
# Load the model and tokenizer | |
model_name = "IsmatS/xlm-roberta-az-ner" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForTokenClassification.from_pretrained(model_name) | |
nlp_ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
# Define label mapping (same as in your main.py) | |
label_mapping = { | |
"LABEL_0": "Other", | |
"LABEL_1": "Person", | |
"LABEL_2": "Location", | |
"LABEL_3": "Organization", | |
"LABEL_4": "Date", | |
"LABEL_5": "Time", | |
"LABEL_6": "Money", | |
"LABEL_7": "Percentage", | |
"LABEL_8": "Facility", | |
"LABEL_9": "Product", | |
"LABEL_10": "Event", | |
"LABEL_11": "Art", | |
"LABEL_12": "Law", | |
"LABEL_13": "Language", | |
"LABEL_14": "Government", | |
"LABEL_15": "Nationality or Religion", | |
"LABEL_16": "Ordinal", | |
"LABEL_17": "Cardinal", | |
"LABEL_18": "Disease", | |
"LABEL_19": "Contact", | |
"LABEL_20": "Proverb or Saying", | |
"LABEL_21": "Quantity", | |
"LABEL_22": "Miscellaneous", | |
"LABEL_23": "Position", | |
"LABEL_24": "Project" | |
} | |
def process_text(text): | |
if not text.strip(): | |
return "Please enter some text to analyze." | |
# Get NER results | |
ner_results = nlp_ner(text) | |
# Initialize dictionary to store entities by type | |
entities_by_type = {} | |
# Process each detected entity | |
for entity in ner_results: | |
# Get the human-readable label | |
entity_type = label_mapping.get(entity["entity_group"], entity["entity_group"]) | |
# Filter out non-entities (label "Other" in this case) | |
if entity_type == "Other": | |
continue | |
# Add entity to the dictionary by its type | |
if entity_type not in entities_by_type: | |
entities_by_type[entity_type] = [] | |
# Append the entity word to the corresponding type list | |
entities_by_type[entity_type].append(f"{entity['word']} (score: {entity['score']:.2f})") | |
# Format the output as HTML | |
if not entities_by_type: | |
return "No entities detected in the provided text." | |
result_html = "<div style='text-align:left;'>" | |
for entity_type, entities in entities_by_type.items(): | |
result_html += f"<h3>{entity_type}</h3>" | |
result_html += "<ul>" | |
for entity in entities: | |
result_html += f"<li>{entity}</li>" | |
result_html += "</ul>" | |
result_html += "</div>" | |
return result_html | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=process_text, | |
inputs=gr.Textbox( | |
lines=5, | |
placeholder="Enter Azerbaijani text here...", | |
value="2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub." | |
), | |
outputs=gr.HTML(), | |
title="Azerbaijani Named Entity Recognition", | |
description="Identify named entities such as persons, locations, organizations, and dates in Azerbaijani text.", | |
examples=[ | |
["2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub."], | |
["Bakı şəhərində Azərbaycan Respublikasının prezidenti İlham Əliyev."], | |
["Apple şirkəti 24 Sentyabr tarixində iPhone 15 modelini təqdim etdi."] | |
], | |
theme=gr.themes.Soft() | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |