File size: 2,792 Bytes
45cc894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import re
import gradio as gr
from symspellpy import SymSpell, Verbosity

# ---------------------------
# Simplification function
# ---------------------------
def simplify_devanagari(text):
    cleaned = re.sub(r'[\u093E-\u094C\u0962\u0963]', '', text)
    cleaned = re.sub(r'[\u0901-\u0903\u093C]', '', cleaned)
    cleaned = re.sub(r'[^\u0900-\u097F]', '', cleaned)
    return cleaned

# ---------------------------
# File paths
# ---------------------------
simplified_only_path = "./data/simplified_only_names.txt"
simplified_dict_path = "./data/simplified_dict.txt"

# ---------------------------
# Load simplified dictionary
# ---------------------------
simplified_map = {}
with open(simplified_dict_path, "r", encoding="utf-8") as f:
    for line in f:
        if ":" not in line:
            continue
        parts = line.strip().strip(",").replace('"', '').split(":")
        if len(parts) == 2:
            orig, simp = parts[0].strip(), parts[1].strip()
            simplified_map[simp] = orig

# ---------------------------
# Main Gradio function
# ---------------------------
def lookup_location(input_term, max_edit_distance, prefix_length):
    sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)

    if not sym_spell.load_dictionary(simplified_only_path, term_index=0, count_index=1, separator="$"):
        return "Failed to load dictionary."

    simplified_input = simplify_devanagari(input_term)
    
    suggestions = sym_spell.lookup(
        simplified_input,
        verbosity=Verbosity.ALL,
        max_edit_distance=max_edit_distance,
        include_unknown=False
    )

    if suggestions:
        for s in suggestions:
            found_simplified = s.term
            original = simplified_map.get(found_simplified, "Not found in dictionary.")
            return f"Input: {input_term}\nSimplified: {found_simplified}\nOriginal: {original}"
    else:
        return f"No suggestions found for: {input_term}"

# ---------------------------
# Gradio UI
# ---------------------------
iface = gr.Interface(
    fn=lookup_location,
    inputs=[
        gr.Textbox(label="Enter Nepali location name"),
        gr.Slider(1, 5, value=3, step=1, label="Max Edit Distance"),
        gr.Slider(1, 10, value=5, step=1, label="Prefix Length")
    ],
    outputs=gr.Textbox(label="Result"),
    title="Nepali Fuzzy Location Lookup",
    description="Uses regex simplification, SymSpell fuzzy match, and maps back to original name. Adjust max edit distance and prefix length.",
    examples=[
        ["काठमाडौँ", 3, 5],
        ["सुकेधारा", 3, 5],
        ["गोंगबु", 3, 5],
        ["माइतीघर", 2, 5],
        ["कलंकी", 2, 5],
    ]
)

iface.launch(share=True)