Spaces:

anisgtboi
/

my-dialect-translator-app

Sleeping

App Files Files Community

anisgtboi commited on 11 days ago

Commit

5b2f305

verified ·

1 Parent(s): 612a620

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -114

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 """
 Enhanced Dialect Bengali Translator with Semantic Search
 Uses both text similarity and semantic pattern matching
 """
 import difflib
@@ -10,15 +11,16 @@ import gradio as gr
 from collections import defaultdict
 import re
-# === Phrase data: [Dialect Bengali, Dialect Bengali Script, Actual Bengali, Benglish] ===
 phrases_data = [
-    ["gesle ni", "গেসলে নি", "গিয়েছিলে কি?", "giese chile ki?"],
-    ["oislo ni", "ওইস্লো নি", "হয়েছে কি?", "hoyeche ki?"],
-    ["oigese ni", "ওইগেসে নি", "হয়েগেছে কি?", "hoyegese ki?"],
-    ["oise", "ওইসে", "হয়েছে", "hoyeche"],
-    ["bala oise", "বালা ওইসে", "ভালো হয়েছে", "bhalo hoyeche"],
-    ["kub bala oise", "কুব বালা ওইসে", "অনেক ভালো হয়েছে", "onek bhalo hoyeche"],
-    ["oise jen", "ওইসে জেন", "হয়েছিল যে", "hoyechilo je"],
     ["jaite ni", "জাইতে নি", "যাবে কি?", "jabe ki?"],
     ["or ni", "ওর নি", "হচ্ছে কি?", "hocche ki?"],
     ["or", "ওর", "হচ্ছে", "hocche"],
@@ -26,13 +28,7 @@ phrases_data = [
     ["bala ni", "বালা নি", "ভালো কি?", "bhalo ki?"],
     ["or je", "ওর যে", "হচ্ছে যে", "hocche je"],
     ["jaibe ni", "জাইবে নি", "যাবে কি?", "jabe ki?"],
-    ["jare ni", "जारे नि", "যাচ্ছো কি?", "jaccho ki?"],
-    ["ami jaimu", "আমি জাইমু", "আমি যাব", "ami jabo"],
-    ["jaimu", "জাইমু", "যাব", "jabo"],
-    ["jaibo", "জাইবো", "যাবে", "jabe"],
-    ["oibo", "ওইবো", "হবে", "hobe"],
-    ["oibo jen", "ওইবো জেন", "হবে যে", "hobe je"],
-    ["Goto kali", "গোতো কালি", "গত কাল", "goto kal"],
     ["Kita kobor?", "কিতা খবর?", "কি খবর?", "ki khobor?"],
     ["Kita korde?", "কিতা কোর্দে?", "কি করছে?", "ki korchho?"],
     ["acha oibo-tik ase", "আচা ওইবো-তিক আসে", "ঠিক আছে", "thik ache"],
@@ -42,43 +38,112 @@ phrases_data = [
     ["se hole", "সে হলে", "তাহলে", "tahole"],
     ["Sob bala asoin ni", "সব বালা আসইন নি", "সব ভালো আছে কি?", "sob bhalo ache ki?"],
     ["Sob bala ase", "সব বালা আসে", "সব ভালো আছে", "sob bhalo ache"],
-    ["Sob bala", "সব বালা", "সব ভালো", "sob bhalo"],
     ["asoini", "আসইনি", "আছে কি?", "ache ki?"],
-    ["ase ni", "আছে নি", "আছে কি?", "ache ki?"],
     ["ase", "আসে", "আছে", "ache"],
-    ["Sob", "সব", "সব", "sob"]
 ]
-# Semantic mapping of dialect patterns to meanings
 semantic_patterns = {
-    "ni": {"meaning": "কি", "type": "question"},
-    "or": {"meaning": "হচ্ছে", "type": "verb"},
-    "oise": {"meaning": "হয়েছে", "type": "verb"},
-    "oibo": {"meaning": "হবে", "type": "verb"},
-    "jaimu": {"meaning": "যাব", "type": "verb"},
-    "jaibo": {"meaning": "যাবে", "type": "verb"},
-    "kobor": {"meaning": "খবর", "type": "noun"},
-    "korde": {"meaning": "করছে", "type": "verb"},
-    "acha": {"meaning": "ঠিক", "type": "adjective"},
-    "bala": {"meaning": "ভালো", "type": "adjective"},
-    "kub": {"meaning": "অনেক", "type": "adverb"},
-    "gesle": {"meaning": "গিয়েছিলে", "type": "verb"},
-    "oislo": {"meaning": "হয়েছে", "type": "verb"},
-    "oigese": {"meaning": "হয়েগেছে", "type": "verb"},
-    "jen": {"meaning": "যে", "type": "conjunction"},
-    "je": {"meaning": "যে", "type": "conjunction"},
-    "tik": {"meaning": "ঠিক", "type": "adjective"},
-    "ase": {"meaning": "আছে", "type": "verb"},
-    "asoin": {"meaning": "আছে", "type": "verb"},
-    "asoini": {"meaning": "আছে কি", "type": "verb+question"},
-    "Goto": {"meaning": "গত", "type": "adjective"},
-    "kali": {"meaning": "কাল", "type": "noun"},
-    "Kita": {"meaning": "কি", "type": "question"},
-    "tew": {"meaning": "তাহলে", "type": "conjunction"},
-    "tente": {"meaning": "তাহলে", "type": "conjunction"},
-    "to": {"meaning": "তাহলে", "type": "conjunction"},
-    "se hole": {"meaning": "তাহলে", "type": "conjunction"},
-    "Sob": {"meaning": "সব", "type": "adjective"}
 }
 # Precompute data structures for matching
@@ -86,7 +151,7 @@ dialects = [p[0] for p in phrases_data]
 dialects_lower = [d.lower() for d in dialects]
 actual_bengali_list = [p[2] for p in phrases_data]
-# Create a mapping from dialect to all other forms
 dialect_to_all = {p[0].lower(): p for p in phrases_data}
 def semantic_analysis(user_input):
@@ -94,39 +159,45 @@ def semantic_analysis(user_input):
     user_lower = user_input.lower()
     detected_patterns = []
     meaning_components = []
-    # Check for semantic patterns
     for pattern, info in semantic_patterns.items():
-        if pattern in user_lower:
-            detected_patterns.append((pattern, info["meaning"], info["type"]))
-            meaning_components.append(info["meaning"])
     return detected_patterns, meaning_components
-def find_semantic_matches(user_input, threshold=0.4):
-    """Find matches based on semantic similarity"""
     user_lower = user_input.lower()
     matches = []
     # Get semantic patterns from user input
     detected_patterns, meaning_components = semantic_analysis(user_input)
     # If we found semantic patterns, look for phrases with similar meanings
     if meaning_components:
         for i, (dialect, dialect_bengali, actual, benglish) in enumerate(phrases_data):
-            # Check if the actual Bengali contains any of the meaning components
-            match_score = 0
             for meaning in meaning_components:
                 if meaning in actual:
-                    match_score += 0.3
-            # Also consider text similarity
             text_similarity = difflib.SequenceMatcher(None, user_lower, dialect.lower()).ratio()
-            total_score = match_score + (text_similarity * 0.7)
             if total_score > threshold:
                 matches.append((i, total_score, "semantic"))
     return matches
 def format_suggestions_from_indices(indices, match_type="text", scores=None):
@@ -134,16 +205,16 @@ def format_suggestions_from_indices(indices, match_type="text", scores=None):
     lines = []
     for i, idx in enumerate(indices):
         d, dialect_bengali, actual, benglish = phrases_data[idx]
         score_str = ""
         if scores is not None and i < len(scores):
             s_pct = int(scores[i] * 100)
             score_str = f" ({match_type}-match: {s_pct}%)"
         lines.append(f"• {d}{score_str}\n    Dialect Bengali: {dialect_bengali}\n    Actual Bengali: {actual}\n    Benglish: {benglish}")
     return "\n\n".join(lines)
-def translate_text(user_text, top_k: int = 5):
     """
     Returns: (dialect_out, actual_out, benglish_out, suggestions_out)
     """
@@ -159,71 +230,62 @@ def translate_text(user_text, top_k: int = 5):
             if q_lower == dialect.lower():
                 return dialect_bengali, actual, benglish, "✅ EXACT MATCH (100%)"
-        # 2) Check if input contains multiple phrases
-        potential_phrases = re.split(r'[.,;!?]\s*', q)
         if len(potential_phrases) > 1:
             results = []
             for phrase in potential_phrases:
-                if phrase.strip():
-                    # Try to match each phrase individually
-                    for d, dialect_bengali, actual, benglish in phrases_data:
-                        if phrase.lower().strip() == d.lower():
-                            results.append(f"{dialect_bengali} → {actual} → {benglish}")
-                            break
-                    else:
-                        results.append(f"'{phrase}' → No match found")
-            if results:
-                return "", "", "", "Multiple phrases detected:\n\n" + "\n\n".join(results)
         # 3) Semantic matches
         semantic_matches = find_semantic_matches(q)
         if semantic_matches:
             semantic_matches.sort(key=lambda x: x[1], reverse=True)
-            best_idx = semantic_matches[0][0]
-            d, dialect_bengali, actual, benglish = phrases_data[best_idx]
-            # Format suggestions
-            indices = [idx for idx, score, match_type in semantic_matches[:top_k]]
-            scores = [score for idx, score, match_type in semantic_matches[:top_k]]
             suggestions = "🔍 Semantic matches found:\n\n" + format_suggestions_from_indices(indices, "semantic", scores)
             return dialect_bengali, actual, benglish, suggestions
-        # 4) Partial matches in dialect
         partial_matches = []
         for i, (dialect, dialect_bengali, actual, benglish) in enumerate(phrases_data):
             if q_lower in dialect.lower() or dialect.lower() in q_lower:
                 similarity = difflib.SequenceMatcher(None, q_lower, dialect.lower()).ratio()
                 partial_matches.append((i, similarity))
         if partial_matches:
             partial_matches.sort(key=lambda x: x[1], reverse=True)
-            best_idx = partial_matches[0][0]
-            d, dialect_bengali, actual, benglish = phrases_data[best_idx]
-            # Format suggestions
             indices = [idx for idx, score in partial_matches[:top_k]]
             scores = [score for idx, score in partial_matches[:top_k]]
             suggestions = "🔍 Partial matches in dialect:\n\n" + format_suggestions_from_indices(indices, "text", scores)
             return dialect_bengali, actual, benglish, suggestions
-        # 5) difflib close textual matches in dialect
         close_matches = difflib.get_close_matches(q_lower, dialects_lower, n=top_k, cutoff=0.3)
         if close_matches:
-            best_text = close_matches[0]
-            idx = dialects_lower.index(best_text)
-            d, dialect_bengali, actual, benglish = phrases_data[idx]
-            text_sim_scores = []
-            for m in close_matches:
-                score = difflib.SequenceMatcher(None, q_lower, m).ratio()
-                text_sim_scores.append(score)
             indices = [dialects_lower.index(m) for m in close_matches]
             suggestions = "🔍 Similar dialect phrases:\n\n" + format_suggestions_from_indices(indices, "text", text_sim_scores)
             return dialect_bengali, actual, benglish, suggestions
-        # 6) Nothing found
-        sample_phrases = [p[0] for p in phrases_data[:8]]
         return "", "", "", "❓ NO MATCH FOUND\n\nTry these sample phrases:\n" + "\n".join([f"• {ph}" for ph in sample_phrases])
     except Exception as ex:
@@ -236,7 +298,7 @@ def show_semantic_analysis(user_text):
         return ""
     patterns, meanings = semantic_analysis(user_text)
     if patterns:
-        return f"Detected patterns: {', '.join([f'{p}→{m}' for p, m, t in patterns])}"
     return "No specific patterns detected"
 # Custom CSS for a softer, less blinding color scheme
@@ -261,10 +323,10 @@ body {
 with gr.Blocks(title="Enhanced Dialect Translator", css=css, theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🌍 Dialect Bengali → Actual Bengali → Benglish")
     gr.Markdown("Type a phrase in your dialect. The app uses both text and semantic matching to find similar phrases.")
     # Define input component first
     inp = gr.Textbox(label="Type phrase in Dialect Bengali", placeholder="e.g. Kita kobor? Sob bala asoin ni")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Examples to try:")
@@ -275,24 +337,24 @@ with gr.Blocks(title="Enhanced Dialect Translator", css=css, theme=gr.themes.Sof
             )
         with gr.Column(scale=2):
             btn = gr.Button("Translate / Find", variant="primary")
     with gr.Row():
         out_dialect = gr.Textbox(label="Dialect Bengali (Bengali Script)")
         out_actual = gr.Textbox(label="Actual Bengali (Standard)")
         out_benglish = gr.Textbox(label="Benglish (Phonetic English)")
     with gr.Row():
         semantic_info = gr.Textbox(label="Semantic Analysis", lines=2)
     suggestions = gr.Textbox(label="Status / Suggestions / Top Candidates", lines=8)
     # Set up event handlers
     btn.click(
-        fn=translate_text,
-        inputs=[inp],
         outputs=[out_dialect, out_actual, out_benglish, suggestions]
     )
     inp.change(
         fn=show_semantic_analysis,
         inputs=[inp],

 """
 Enhanced Dialect Bengali Translator with Semantic Search
 Uses both text similarity and semantic pattern matching
+Updated to include new dialect patterns and polite/negative 'des/dis' behavior
 """
 import difflib
 from collections import defaultdict
 import re
+# === Phrase data: [Dialect Latin, Dialect Bengali Script, Actual Bengali (Std), Benglish] ===
 phrases_data = [
+    # Questions / common
+    ["gesle ni", "গেসলে নি", "গিয়েছিলে কি?", "giese chile ki?"],
+    ["oislo ni", "ওইস্লো নি", "হয়েছে কি?", "hoyeche ki?"],
+    ["oigese ni", "ওইগেসে নি", "হয়ে গেছে কি?", "hoyegese ki?"],
+    ["oise", "ওইসে", "হয়েছে", "hoyeche"],
+    ["bala oise", "বালা ওইসে", "ভালো হয়েছে", "bhalo hoyeche"],
+    ["kub bala oise", "কুব বালা ওইসে", "অনেক ভালো হয়েছে", "onek bhalo hoyeche"],
+    ["oise jen", "ওইসে জেন", "হয়েছিল যে", "hoyechilo je"],
     ["jaite ni", "জাইতে নি", "যাবে কি?", "jabe ki?"],
     ["or ni", "ওর নি", "হচ্ছে কি?", "hocche ki?"],
     ["or", "ওর", "হচ্ছে", "hocche"],
     ["bala ni", "বালা নি", "ভালো কি?", "bhalo ki?"],
     ["or je", "ওর যে", "হচ্ছে যে", "hocche je"],
     ["jaibe ni", "জাইবে নি", "যাবে কি?", "jabe ki?"],
+    ["jare ni", "জারে নি", "যাচ্ছো কি?", "jaccho ki?"],
     ["Kita kobor?", "কিতা খবর?", "কি খবর?", "ki khobor?"],
     ["Kita korde?", "কিতা কোর্দে?", "কি করছে?", "ki korchho?"],
     ["acha oibo-tik ase", "আচা ওইবো-তিক আসে", "ঠিক আছে", "thik ache"],
     ["se hole", "সে হলে", "তাহলে", "tahole"],
     ["Sob bala asoin ni", "সব বালা আসইন নি", "সব ভালো আছে কি?", "sob bhalo ache ki?"],
     ["Sob bala ase", "সব বালা আসে", "সব ভালো আছে", "sob bhalo ache"],
     ["asoini", "আসইনি", "আছে কি?", "ache ki?"],
     ["ase", "আসে", "আছে", "ache"],
+    # Future / Present / Past core verbs (ja / de / fawa / ka)
+    ["jaimu", "জাইমু", "যাব", "jabo"],
+    ["jaibay", "জাইবে", "তুমি যাবে", "tumi jabe (dialect)"],
+    ["jaibe", "জাইবে", "তুমি যাবে (friend)", "tumi jabe (friend form)"],
+    ["jaibo", "জাইবো", "যাবে", "jabe"],
+    ["jaiba", "জাইবা", "তারা যাবে", "tara jabe"],
+    ["oibo", "ওইবো", "হবে", "hobe"],
+    ["oibo jen", "ওইবো জেন", "হবে যে", "hobe je"],
+    ["ami jaimu", "আমি জাইমু", "আমি যাব", "ami jabo"],
+    ["Ami bazaro jaimu", "আমি বাজারো জাইমু", "আমি বাজারে যাব", "ami bazar e jabo"],
+    ["He rit aise", "হে রিত আসে", "সে রাতে এসেছে", "se rate esheche"],
+    # Give (de) family
+    ["des", "দেস", "দাও (মৃদু)", "des (give, friendly)"],
+    ["des na", "দেস না", "দাও (দয়া করে, মৃদু অনুরোধ)", "des na (please give)"],
+    ["dis", "দিস", "না দাও / নিষেধ", "dis (don't give)"],
+    ["dis na", "দিস না", "দেও না", "dis na (don't give)"],
+    ["dilaisi", "দিলাইসি", "দিয়েছি", "diyechi (I gave)"],
+    ["dilaise", "দিলাইসে", "দিয়েছে", "diyeche (he gave)"],
+    ["dilaisoin", "দিলাইসইন", "দিয়েছেন (সম্মানভাষা)", "diyechen (honorific)"],
+    ["dise na", "দিসে না", "দেয়নি", "deni (didn't give)"],
+    ["dibo", "দিবো", "দেব", "debo (will give)"],
+    ["der amare", "দের আমিরে", "সে আমাকে দেয়", "se amake dey"],
+    ["dibo amare", "দিবো আমিরে", "সে আমাকে দেবে", "se amake debe"],
+    # Get / receive (fawa) family
+    ["faisi", "ফাইসি", "পেয়েছি", "peyechi (I got)"],
+    ["faisi na", "ফাইসি না", "পাইনি", "pelam na (didn't get)"],
+    ["faisot ni", "ফাইসোট নি", "পেলে কি?", "pele ki?"],
+    ["faislo", "ফাইসলো", "পেয়ে গেল/লাভ করল (3sg past)", "pelo (he got)"],
+    ["faislam", "ফাইসলাম", "পেয়েছিলাম", "pelam (I got past)"],
+    ["faisla", "ফাইসলা", "পেয়েছিল (they)", "pela (they got)"],
+    ["faisly", "ফাইসলাই", "তুমি পেয়েছ", "tumi pele (you got)"],
+    ["faimu", "ফাইমু", "পাব", "pabo (I will get)"],
+    ["faibay", "ফাইবে", "তুমি পাবে (dialect)", "tumi pabe"],
+    ["faibe", "ফাইবে", "তুমি পাবে (friend)", "tumi pabe (friend)"],
+    ["faibo", "ফাইবো", "সে পাবে", "se pabe"],
+    ["faiba", "ফাইবা", "তারা পাবে", "tara pabe"],
+    # Eat (ka) family
+    ["kaimu", "কাইমু", "খাব", "khaimu (I will eat)"],
+    ["kaibay", "কাইবে", "তুমি খাব (dialect)", "tumi khabe"],
+    ["kaibe", "কাইবে", "তুমি খাব (friend)", "tumi khabe (friend)"],
+    ["kaibo", "কাইবো", "সে খাবে", "se khabe"],
+    ["kaiba", "কাইবা", "তারা খাবে", "tara khabe"],
+    # Other sample sentences from user's corpus
+    ["Ami faisi ekta notun jinish", "আমি ফাইসি একটা নতুন জিনিস", "আমি একটা নতুন জিনিস পেয়েছি", "ami ekta notun jinish peyechi"],
+    ["Tumi taka faiso ni", "তুমি টাকা ফাইসো নি", "তুমি টাকা পেয়েছ কি?", "tumi taka peyecho ki?"],
+    ["He sobsomoy amare teka dey", "হে সবসময় আমিারে তেকা দেয়", "সে সবসময় আমাকে টাকা দেয়", "se shobshomoy amake taka dey"],
+    ["Ami bazaro jaimu", "আমি বাজারো জাইমু", "আমি বাজারে যাব", "ami bazar e jabo"],
+    ["Tara bazaro bohut jinish faisoin", "তারা বাজারো বহুত জিনিস ফাইসইন", "তারা বাজারে অনেক জিনিস পেয়েছে", "tara bazar e onek jinish peyechhe"],
+    ["Tumi boi diso ni", "তুমি বই দিসো নি", "আপনি কি বই দিয়েছেন?", "apni boi diyechen?"],
+    ["Tuin boi disot ni", "তুইন বই দিসট নি", "তুই বই দিয়েছ কি?", "tui boi diyechish?"],
+    ["Bifodo asi", "বিফোডো আছি", "বিপদে আছি", "bipode achi"],
+    ["Kotobil bade fawa gese", "কোটবিল বাদে ফাওয়া গেসে", "অনেকদিন পরে পেয়েছি", "got after long time got"]
 ]
+# Semantic mapping of dialect patterns to meanings + types
 semantic_patterns = {
+    # question/particles
+    r"\bni\b": {"meaning": "কি", "type": "question"},
+    r"\bni\b$": {"meaning": "কি", "type": "question"},
+    # verbs / roots
+    r"\bor\b": {"meaning": "হচ্ছে", "type": "verb"},
+    r"\boise\b": {"meaning": "হয়েছে", "type": "verb"},
+    r"\boibo\b": {"meaning": "হবে", "type": "verb"},
+    r"\bjaimu\b": {"meaning": "যাব", "type": "verb"},
+    r"\bjaib[aey]\b": {"meaning": "যাবে", "type": "verb"},
+    r"\bkobor\b": {"meaning": "খবর", "type": "noun"},
+    r"\bkorde\b": {"meaning": "করছে", "type": "verb"},
+    r"\bacha\b": {"meaning": "ঠিক", "type": "adjective"},
+    r"\bbala\b": {"meaning": "ভালো", "type": "adjective"},
+    r"\bkub\b": {"meaning": "অনেক", "type": "adverb"},
+    r"\bgesle\b": {"meaning": "গিয়েছিলে", "type": "verb"},
+    r"\boislo\b": {"meaning": "হয়েছে", "type": "verb"},
+    r"\boigese\b": {"meaning": "হয়েগেছে", "type": "verb"},
+    r"\bjen\b": {"meaning": "যে", "type": "conjunction"},
+    r"\bje\b": {"meaning": "যে", "type": "conjunction"},
+    r"\btik\b": {"meaning": "ঠিক", "type": "adjective"},
+    r"\base\b": {"meaning": "আছে", "type": "verb"},
+    r"\basoin\b": {"meaning": "আছে", "type": "verb"},
+    r"\basoini\b": {"meaning": "আছে কি", "type": "verb+question"},
+    r"\bGoto\b": {"meaning": "গত", "type": "adjective"},
+    r"\bkali\b": {"meaning": "কাল", "type": "noun"},
+    r"\bkita\b": {"meaning": "কি", "type": "question"},
+    r"\btew\b": {"meaning": "তাহলে", "type": "conjunction"},
+    # give/get polarity (important dialect contrast)
+    r"\bdes\b": {"meaning": "দান/দাও (বন্ধু-মৃদু)", "type": "give_positive"},
+    r"\bdes\s+na\b": {"meaning": "মৃদু অনুরোধ: দাও", "type": "give_positive"},
+    r"\bdis\b": {"meaning": "না দাও / নিষেধ", "type": "give_negative"},
+    r"\bdis\s+na\b": {"meaning": "না দাও (নিষেধ)", "type": "give_negative"},
+    # fawa/get variants
+    r"\bfaisi\b": {"meaning": "পেয়েছি", "type": "verb"},
+    r"\bfaisl[ao]m\b": {"meaning": "পেয়েছিলাম/পেয়েছি(past)", "type": "verb"},
+    r"\bfaimu\b": {"meaning": "পাব", "type": "verb"},
+    r"\bfaib[ae]y?\b": {"meaning": "পাবে", "type": "verb"},
+    # future pattern markers
+    r"\bmu\b": {"meaning": "ভবিষ্যৎ: 1sg", "type": "tense_future"},
+    r"\bbay\b": {"meaning": "ভবিষ্যৎ: 2sg (tumi)", "type": "tense_future"},
+    r"\bbo\b": {"meaning": "ভবিষ্যৎ: 3sg", "type": "tense_future"},
+    r"\bba\b": {"meaning": "ভবিষ্যৎ: plural/3pl", "type": "tense_future"},
 }
 # Precompute data structures for matching
 dialects_lower = [d.lower() for d in dialects]
 actual_bengali_list = [p[2] for p in phrases_data]
+# Create a mapping from dialect to full row
 dialect_to_all = {p[0].lower(): p for p in phrases_data}
 def semantic_analysis(user_input):
     user_lower = user_input.lower()
     detected_patterns = []
     meaning_components = []
+    # Use regex-based whole-word matching for patterns
     for pattern, info in semantic_patterns.items():
+        try:
+            if re.search(pattern, user_lower):
+                detected_patterns.append((pattern, info["meaning"], info["type"]))
+                meaning_components.append(info["meaning"])
+        except re.error:
+            # If pattern is bad, skip it safely
+            continue
     return detected_patterns, meaning_components
+def find_semantic_matches(user_input, threshold=0.35):
+    """Find matches based on semantic similarity + text similarity"""
     user_lower = user_input.lower()
     matches = []
     # Get semantic patterns from user input
     detected_patterns, meaning_components = semantic_analysis(user_input)
     # If we found semantic patterns, look for phrases with similar meanings
     if meaning_components:
         for i, (dialect, dialect_bengali, actual, benglish) in enumerate(phrases_data):
+            match_score = 0.0
+            # boost if any of the meaning_components appear in actual or dialect
             for meaning in meaning_components:
                 if meaning in actual:
+                    match_score += 0.35
+                if meaning in dialect.lower():
+                    match_score += 0.25
+            # text similarity between user and dialect form
             text_similarity = difflib.SequenceMatcher(None, user_lower, dialect.lower()).ratio()
+            total_score = match_score + (text_similarity * 0.5)
             if total_score > threshold:
                 matches.append((i, total_score, "semantic"))
     return matches
 def format_suggestions_from_indices(indices, match_type="text", scores=None):
     lines = []
     for i, idx in enumerate(indices):
         d, dialect_bengali, actual, benglish = phrases_data[idx]
         score_str = ""
         if scores is not None and i < len(scores):
             s_pct = int(scores[i] * 100)
             score_str = f" ({match_type}-match: {s_pct}%)"
         lines.append(f"• {d}{score_str}\n    Dialect Bengali: {dialect_bengali}\n    Actual Bengali: {actual}\n    Benglish: {benglish}")
     return "\n\n".join(lines)
+def translate_text(user_text, top_k: int = 6):
     """
     Returns: (dialect_out, actual_out, benglish_out, suggestions_out)
     """
             if q_lower == dialect.lower():
                 return dialect_bengali, actual, benglish, "✅ EXACT MATCH (100%)"
+        # 2) If input contains multiple phrases separated by punctuation
+        potential_phrases = [p.strip() for p in re.split(r'[.,;!?]\s*', q) if p.strip()]
         if len(potential_phrases) > 1:
             results = []
             for phrase in potential_phrases:
+                matched = False
+                for d, dialect_bengali, actual, benglish in phrases_data:
+                    if phrase.lower() == d.lower():
+                        results.append(f"{dialect_bengali} → {actual} → {benglish}")
+                        matched = True
+                        break
+                if not matched:
+                    results.append(f"'{phrase}' → No match found")
+            return "", "", "", "Multiple phrases detected:\n\n" + "\n\n".join(results)
         # 3) Semantic matches
         semantic_matches = find_semantic_matches(q)
         if semantic_matches:
+            # sort and return top semantic candidates
             semantic_matches.sort(key=lambda x: x[1], reverse=True)
+            indices = [idx for idx, score, mt in semantic_matches[:top_k]]
+            scores = [score for idx, score, mt in semantic_matches[:top_k]]
             suggestions = "🔍 Semantic matches found:\n\n" + format_suggestions_from_indices(indices, "semantic", scores)
+            # Return best match as primary output
+            best_idx = indices[0]
+            d, dialect_bengali, actual, benglish = phrases_data[best_idx]
             return dialect_bengali, actual, benglish, suggestions
+        # 4) Partial matches in dialect strings
         partial_matches = []
         for i, (dialect, dialect_bengali, actual, benglish) in enumerate(phrases_data):
             if q_lower in dialect.lower() or dialect.lower() in q_lower:
                 similarity = difflib.SequenceMatcher(None, q_lower, dialect.lower()).ratio()
                 partial_matches.append((i, similarity))
         if partial_matches:
             partial_matches.sort(key=lambda x: x[1], reverse=True)
             indices = [idx for idx, score in partial_matches[:top_k]]
             scores = [score for idx, score in partial_matches[:top_k]]
+            best_idx = indices[0]
+            d, dialect_bengali, actual, benglish = phrases_data[best_idx]
             suggestions = "🔍 Partial matches in dialect:\n\n" + format_suggestions_from_indices(indices, "text", scores)
             return dialect_bengali, actual, benglish, suggestions
+        # 5) Close textual matches using difflib
         close_matches = difflib.get_close_matches(q_lower, dialects_lower, n=top_k, cutoff=0.3)
         if close_matches:
             indices = [dialects_lower.index(m) for m in close_matches]
+            text_sim_scores = [difflib.SequenceMatcher(None, q_lower, m).ratio() for m in close_matches]
+            best_idx = indices[0]
+            d, dialect_bengali, actual, benglish = phrases_data[best_idx]
             suggestions = "🔍 Similar dialect phrases:\n\n" + format_suggestions_from_indices(indices, "text", text_sim_scores)
             return dialect_bengali, actual, benglish, suggestions
+        # 6) Nothing found — give sample suggestions
+        sample_phrases = [p[0] for p in phrases_data[:10]]
         return "", "", "", "❓ NO MATCH FOUND\n\nTry these sample phrases:\n" + "\n".join([f"• {ph}" for ph in sample_phrases])
     except Exception as ex:
         return ""
     patterns, meanings = semantic_analysis(user_text)
     if patterns:
+        return f"Detected patterns: {', '.join([f'{p} → {m}' for p, m, t in patterns])}"
     return "No specific patterns detected"
 # Custom CSS for a softer, less blinding color scheme
 with gr.Blocks(title="Enhanced Dialect Translator", css=css, theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🌍 Dialect Bengali → Actual Bengali → Benglish")
     gr.Markdown("Type a phrase in your dialect. The app uses both text and semantic matching to find similar phrases.")
     # Define input component first
     inp = gr.Textbox(label="Type phrase in Dialect Bengali", placeholder="e.g. Kita kobor? Sob bala asoin ni")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Examples to try:")
             )
         with gr.Column(scale=2):
             btn = gr.Button("Translate / Find", variant="primary")
     with gr.Row():
         out_dialect = gr.Textbox(label="Dialect Bengali (Bengali Script)")
         out_actual = gr.Textbox(label="Actual Bengali (Standard)")
         out_benglish = gr.Textbox(label="Benglish (Phonetic English)")
     with gr.Row():
         semantic_info = gr.Textbox(label="Semantic Analysis", lines=2)
     suggestions = gr.Textbox(label="Status / Suggestions / Top Candidates", lines=8)
     # Set up event handlers
     btn.click(
+        fn=translate_text,
+        inputs=[inp],
         outputs=[out_dialect, out_actual, out_benglish, suggestions]
     )
     inp.change(
         fn=show_semantic_analysis,
         inputs=[inp],