Update app.py
Browse files
app.py
CHANGED
@@ -62,6 +62,8 @@ common_phrases = {
|
|
62 |
"See you later": "بعداً میبینمت",
|
63 |
"What is this?": "این چیست؟",
|
64 |
"I am happy": "خوشحالم",
|
|
|
|
|
65 |
}
|
66 |
|
67 |
# Function to split text into smaller phrases
|
@@ -80,23 +82,31 @@ def transliterate_farsi_to_cyrillic(farsi_text):
|
|
80 |
word_map = {
|
81 |
"سلام": "Салом",
|
82 |
"خداحافظ": "Худоҳафиз",
|
83 |
-
"شب بخیر": "
|
84 |
-
"صبح بخیر": "
|
85 |
"ممنون": "Ташаккур",
|
86 |
"خواهش میکنم": "Илтимос",
|
87 |
-
"چطور هستی؟": "Чӣ
|
88 |
-
"چطور هستید؟": "Шумо чӣ
|
89 |
"بله": "Ҳа",
|
90 |
"نه": "Не",
|
91 |
"ایران": "Эрон",
|
92 |
"تشکر": "Ташаккур",
|
93 |
-
"فارسی": "
|
94 |
"اسم من": "Номи ман",
|
95 |
-
"لطفا": "
|
96 |
"کمک": "Кумак",
|
97 |
-
"هستی": "
|
98 |
"هستید": "ҳастед",
|
99 |
"است": "аст",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
}
|
101 |
|
102 |
char_map = {
|
@@ -150,7 +160,7 @@ def transliterate_farsi_to_cyrillic(farsi_text):
|
|
150 |
|
151 |
return " ".join(cyrillic_words)
|
152 |
|
153 |
-
# Translation function with input validation
|
154 |
def translate_to_cyrillic_farsi(text):
|
155 |
if not text or not text.strip():
|
156 |
return "Error: Please enter a valid English text.", ""
|
@@ -163,6 +173,9 @@ def translate_to_cyrillic_farsi(text):
|
|
163 |
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
|
164 |
farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
165 |
|
|
|
|
|
|
|
166 |
# Check if the translation is valid Farsi
|
167 |
if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")):
|
168 |
# Fall back to phrase-by-phrase translation
|
@@ -175,7 +188,7 @@ def translate_to_cyrillic_farsi(text):
|
|
175 |
tokenizer.src_lang = "en"
|
176 |
encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
|
177 |
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
|
178 |
-
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
179 |
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
|
180 |
farsi_translations.append(translated_text)
|
181 |
else:
|
|
|
62 |
"See you later": "بعداً میبینمت",
|
63 |
"What is this?": "این چیست؟",
|
64 |
"I am happy": "خوشحالم",
|
65 |
+
"It is very chilly today": "امروز خیلی سرد است",
|
66 |
+
"I hope we have better weather tomorrow": "امیدوارم فردا هوا بهتر شود",
|
67 |
}
|
68 |
|
69 |
# Function to split text into smaller phrases
|
|
|
82 |
word_map = {
|
83 |
"سلام": "Салом",
|
84 |
"خداحافظ": "Худоҳафиз",
|
85 |
+
"شب بخیر": "Шаб хайр",
|
86 |
+
"صبح بخیر": "Субҳ хайр",
|
87 |
"ممنون": "Ташаккур",
|
88 |
"خواهش میکنم": "Илтимос",
|
89 |
+
"چطور هستی؟": "Чӣ тур ҳастӣ?",
|
90 |
+
"چطور هستید؟": "Шумо чӣ туред?",
|
91 |
"بله": "Ҳа",
|
92 |
"نه": "Не",
|
93 |
"ایران": "Эрон",
|
94 |
"تشکر": "Ташаккур",
|
95 |
+
"فارسی": "Форсӣ",
|
96 |
"اسم من": "Номи ман",
|
97 |
+
"لطفا": "Илтимос",
|
98 |
"کمک": "Кумак",
|
99 |
+
"هستی": "ҳастӣ",
|
100 |
"هستید": "ҳастед",
|
101 |
"است": "аст",
|
102 |
+
"امروز": "Имрӯз",
|
103 |
+
"خیلی": "Хеле",
|
104 |
+
"سرد": "Сард",
|
105 |
+
"امیدوارم": "Умидворам",
|
106 |
+
"فردا": "Фардо",
|
107 |
+
"هوا": "Ҳаво",
|
108 |
+
"بهتر": "Беҳтар",
|
109 |
+
"شود": "Шавад",
|
110 |
}
|
111 |
|
112 |
char_map = {
|
|
|
160 |
|
161 |
return " ".join(cyrillic_words)
|
162 |
|
163 |
+
# Translation function with input validation and cleaning
|
164 |
def translate_to_cyrillic_farsi(text):
|
165 |
if not text or not text.strip():
|
166 |
return "Error: Please enter a valid English text.", ""
|
|
|
173 |
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
|
174 |
farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
175 |
|
176 |
+
# Clean the Farsi text (remove leading/trailing unwanted punctuation)
|
177 |
+
farsi_text = farsi_text.strip(".!?, ")
|
178 |
+
|
179 |
# Check if the translation is valid Farsi
|
180 |
if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")):
|
181 |
# Fall back to phrase-by-phrase translation
|
|
|
188 |
tokenizer.src_lang = "en"
|
189 |
encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
|
190 |
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
|
191 |
+
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True).strip(".!?, ")
|
192 |
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
|
193 |
farsi_translations.append(translated_text)
|
194 |
else:
|