Spaces:

IsmatS
/

azerbaijani-ner-demo

Sleeping

App Files Files Community

azerbaijani-ner-demo / app.py

IsmatS

Add application file

9d445eb about 2 months ago

raw

history blame contribute delete

3.38 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification

	# Load the model and tokenizer
	model_name = "IsmatS/xlm-roberta-az-ner"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForTokenClassification.from_pretrained(model_name)
	nlp_ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

	# Define label mapping (same as in your main.py)
	label_mapping = {
	"LABEL_0": "Other",
	"LABEL_1": "Person",
	"LABEL_2": "Location",
	"LABEL_3": "Organization",
	"LABEL_4": "Date",
	"LABEL_5": "Time",
	"LABEL_6": "Money",
	"LABEL_7": "Percentage",
	"LABEL_8": "Facility",
	"LABEL_9": "Product",
	"LABEL_10": "Event",
	"LABEL_11": "Art",
	"LABEL_12": "Law",
	"LABEL_13": "Language",
	"LABEL_14": "Government",
	"LABEL_15": "Nationality or Religion",
	"LABEL_16": "Ordinal",
	"LABEL_17": "Cardinal",
	"LABEL_18": "Disease",
	"LABEL_19": "Contact",
	"LABEL_20": "Proverb or Saying",
	"LABEL_21": "Quantity",
	"LABEL_22": "Miscellaneous",
	"LABEL_23": "Position",
	"LABEL_24": "Project"
	}

	def process_text(text):
	if not text.strip():
	return "Please enter some text to analyze."

	# Get NER results
	ner_results = nlp_ner(text)

	# Initialize dictionary to store entities by type
	entities_by_type = {}

	# Process each detected entity
	for entity in ner_results:
	# Get the human-readable label
	entity_type = label_mapping.get(entity["entity_group"], entity["entity_group"])

	# Filter out non-entities (label "Other" in this case)
	if entity_type == "Other":
	continue

	# Add entity to the dictionary by its type
	if entity_type not in entities_by_type:
	entities_by_type[entity_type] = []

	# Append the entity word to the corresponding type list
	entities_by_type[entity_type].append(f"{entity['word']} (score: {entity['score']:.2f})")

	# Format the output as HTML
	if not entities_by_type:
	return "No entities detected in the provided text."

	result_html = "<div style='text-align:left;'>"
	for entity_type, entities in entities_by_type.items():
	result_html += f"<h3>{entity_type}</h3>"
	result_html += "<ul>"
	for entity in entities:
	result_html += f"<li>{entity}</li>"
	result_html += "</ul>"
	result_html += "</div>"

	return result_html

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_text,
	inputs=gr.Textbox(
	lines=5,
	placeholder="Enter Azerbaijani text here...",
	value="2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub."
	),
	outputs=gr.HTML(),
	title="Azerbaijani Named Entity Recognition",
	description="Identify named entities such as persons, locations, organizations, and dates in Azerbaijani text.",
	examples=[
	["2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub."],
	["Bakı şəhərində Azərbaycan Respublikasının prezidenti İlham Əliyev."],
	["Apple şirkəti 24 Sentyabr tarixində iPhone 15 modelini təqdim etdi."]
	],
	theme=gr.themes.Soft()
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()