Symspell4nepali / main.py
nabin2004's picture
Upload folder using huggingface_hub
45cc894 verified
import re
import gradio as gr
from symspellpy import SymSpell, Verbosity
# ---------------------------
# Simplification function
# ---------------------------
def simplify_devanagari(text):
cleaned = re.sub(r'[\u093E-\u094C\u0962\u0963]', '', text)
cleaned = re.sub(r'[\u0901-\u0903\u093C]', '', cleaned)
cleaned = re.sub(r'[^\u0900-\u097F]', '', cleaned)
return cleaned
# ---------------------------
# File paths
# ---------------------------
simplified_only_path = "./data/simplified_only_names.txt"
simplified_dict_path = "./data/simplified_dict.txt"
# ---------------------------
# Load simplified dictionary
# ---------------------------
simplified_map = {}
with open(simplified_dict_path, "r", encoding="utf-8") as f:
for line in f:
if ":" not in line:
continue
parts = line.strip().strip(",").replace('"', '').split(":")
if len(parts) == 2:
orig, simp = parts[0].strip(), parts[1].strip()
simplified_map[simp] = orig
# ---------------------------
# Main Gradio function
# ---------------------------
def lookup_location(input_term, max_edit_distance, prefix_length):
sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
if not sym_spell.load_dictionary(simplified_only_path, term_index=0, count_index=1, separator="$"):
return "Failed to load dictionary."
simplified_input = simplify_devanagari(input_term)
suggestions = sym_spell.lookup(
simplified_input,
verbosity=Verbosity.ALL,
max_edit_distance=max_edit_distance,
include_unknown=False
)
if suggestions:
for s in suggestions:
found_simplified = s.term
original = simplified_map.get(found_simplified, "Not found in dictionary.")
return f"Input: {input_term}\nSimplified: {found_simplified}\nOriginal: {original}"
else:
return f"No suggestions found for: {input_term}"
# ---------------------------
# Gradio UI
# ---------------------------
iface = gr.Interface(
fn=lookup_location,
inputs=[
gr.Textbox(label="Enter Nepali location name"),
gr.Slider(1, 5, value=3, step=1, label="Max Edit Distance"),
gr.Slider(1, 10, value=5, step=1, label="Prefix Length")
],
outputs=gr.Textbox(label="Result"),
title="Nepali Fuzzy Location Lookup",
description="Uses regex simplification, SymSpell fuzzy match, and maps back to original name. Adjust max edit distance and prefix length.",
examples=[
["काठमाडौँ", 3, 5],
["सुकेधारा", 3, 5],
["गोंगबु", 3, 5],
["माइतीघर", 2, 5],
["कलंकी", 2, 5],
]
)
iface.launch(share=True)