Spaces:
Sleeping
Sleeping
| import re | |
| import gradio as gr | |
| from symspellpy import SymSpell, Verbosity | |
| # --------------------------- | |
| # Simplification function | |
| # --------------------------- | |
| def simplify_devanagari(text): | |
| cleaned = re.sub(r'[\u093E-\u094C\u0962\u0963]', '', text) | |
| cleaned = re.sub(r'[\u0901-\u0903\u093C]', '', cleaned) | |
| cleaned = re.sub(r'[^\u0900-\u097F]', '', cleaned) | |
| return cleaned | |
| # --------------------------- | |
| # File paths | |
| # --------------------------- | |
| simplified_only_path = "./data/simplified_only_names.txt" | |
| simplified_dict_path = "./data/simplified_dict.txt" | |
| # --------------------------- | |
| # Load simplified dictionary | |
| # --------------------------- | |
| simplified_map = {} | |
| with open(simplified_dict_path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| if ":" not in line: | |
| continue | |
| parts = line.strip().strip(",").replace('"', '').split(":") | |
| if len(parts) == 2: | |
| orig, simp = parts[0].strip(), parts[1].strip() | |
| simplified_map[simp] = orig | |
| # --------------------------- | |
| # Main Gradio function | |
| # --------------------------- | |
| def lookup_location(input_term, max_edit_distance, prefix_length): | |
| sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length) | |
| if not sym_spell.load_dictionary(simplified_only_path, term_index=0, count_index=1, separator="$"): | |
| return "Failed to load dictionary." | |
| simplified_input = simplify_devanagari(input_term) | |
| suggestions = sym_spell.lookup( | |
| simplified_input, | |
| verbosity=Verbosity.ALL, | |
| max_edit_distance=max_edit_distance, | |
| include_unknown=False | |
| ) | |
| if suggestions: | |
| for s in suggestions: | |
| found_simplified = s.term | |
| original = simplified_map.get(found_simplified, "Not found in dictionary.") | |
| return f"Input: {input_term}\nSimplified: {found_simplified}\nOriginal: {original}" | |
| else: | |
| return f"No suggestions found for: {input_term}" | |
| # --------------------------- | |
| # Gradio UI | |
| # --------------------------- | |
| iface = gr.Interface( | |
| fn=lookup_location, | |
| inputs=[ | |
| gr.Textbox(label="Enter Nepali location name"), | |
| gr.Slider(1, 5, value=3, step=1, label="Max Edit Distance"), | |
| gr.Slider(1, 10, value=5, step=1, label="Prefix Length") | |
| ], | |
| outputs=gr.Textbox(label="Result"), | |
| title="Nepali Fuzzy Location Lookup", | |
| description="Uses regex simplification, SymSpell fuzzy match, and maps back to original name. Adjust max edit distance and prefix length.", | |
| examples=[ | |
| ["काठमाडौँ", 3, 5], | |
| ["सुकेधारा", 3, 5], | |
| ["गोंगबु", 3, 5], | |
| ["माइतीघर", 2, 5], | |
| ["कलंकी", 2, 5], | |
| ] | |
| ) | |
| iface.launch(share=True) | |