saadustto2007 commited on
Commit
d59e3a5
·
verified ·
1 Parent(s): 2e10399

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -9
app.py CHANGED
@@ -62,6 +62,8 @@ common_phrases = {
62
  "See you later": "بعداً می‌بینمت",
63
  "What is this?": "این چیست؟",
64
  "I am happy": "خوشحالم",
 
 
65
  }
66
 
67
  # Function to split text into smaller phrases
@@ -80,23 +82,31 @@ def transliterate_farsi_to_cyrillic(farsi_text):
80
  word_map = {
81
  "سلام": "Салом",
82
  "خداحافظ": "Худоҳафиз",
83
- "شب بخیر": "Шаби хайр",
84
- "صبح بخیر": "Субҳи хайр",
85
  "ممنون": "Ташаккур",
86
  "خواهش میکنم": "Илтимос",
87
- "چطور هستی؟": "Чӣ хел ҳастӣ?",
88
- "چطور هستید؟": "Шумо чӣ хелед?",
89
  "بله": "Ҳа",
90
  "نه": "Не",
91
  "ایران": "Эрон",
92
  "تشکر": "Ташаккур",
93
- "فارسی": "Форсī",
94
  "اسم من": "Номи ман",
95
- "لطفا": "Илтимوس",
96
  "کمک": "Кумак",
97
- "هستی": "ҳастī",
98
  "هستید": "ҳастед",
99
  "است": "аст",
 
 
 
 
 
 
 
 
100
  }
101
 
102
  char_map = {
@@ -150,7 +160,7 @@ def transliterate_farsi_to_cyrillic(farsi_text):
150
 
151
  return " ".join(cyrillic_words)
152
 
153
- # Translation function with input validation
154
  def translate_to_cyrillic_farsi(text):
155
  if not text or not text.strip():
156
  return "Error: Please enter a valid English text.", ""
@@ -163,6 +173,9 @@ def translate_to_cyrillic_farsi(text):
163
  translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
164
  farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
165
 
 
 
 
166
  # Check if the translation is valid Farsi
167
  if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")):
168
  # Fall back to phrase-by-phrase translation
@@ -175,7 +188,7 @@ def translate_to_cyrillic_farsi(text):
175
  tokenizer.src_lang = "en"
176
  encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
177
  translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
178
- translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
179
  if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
180
  farsi_translations.append(translated_text)
181
  else:
 
62
  "See you later": "بعداً می‌بینمت",
63
  "What is this?": "این چیست؟",
64
  "I am happy": "خوشحالم",
65
+ "It is very chilly today": "امروز خیلی سرد است",
66
+ "I hope we have better weather tomorrow": "امیدوارم فردا هوا بهتر شود",
67
  }
68
 
69
  # Function to split text into smaller phrases
 
82
  word_map = {
83
  "سلام": "Салом",
84
  "خداحافظ": "Худоҳафиз",
85
+ "شب بخیر": "Шаб хайр",
86
+ "صبح بخیر": "Субҳ хайр",
87
  "ممنون": "Ташаккур",
88
  "خواهش میکنم": "Илтимос",
89
+ "چطور هستی؟": "Чӣ тур ҳастӣ?",
90
+ "چطور هستید؟": "Шумо чӣ туред?",
91
  "بله": "Ҳа",
92
  "نه": "Не",
93
  "ایران": "Эрон",
94
  "تشکر": "Ташаккур",
95
+ "فارسی": "Форсӣ",
96
  "اسم من": "Номи ман",
97
+ "لطفا": "Илтимос",
98
  "کمک": "Кумак",
99
+ "هستی": "ҳастӣ",
100
  "هستید": "ҳастед",
101
  "است": "аст",
102
+ "امروز": "Имрӯз",
103
+ "خیلی": "Хеле",
104
+ "سرد": "Сард",
105
+ "امیدوارم": "Умидворам",
106
+ "فردا": "Фардо",
107
+ "هوا": "Ҳаво",
108
+ "بهتر": "Беҳтар",
109
+ "شود": "Шавад",
110
  }
111
 
112
  char_map = {
 
160
 
161
  return " ".join(cyrillic_words)
162
 
163
+ # Translation function with input validation and cleaning
164
  def translate_to_cyrillic_farsi(text):
165
  if not text or not text.strip():
166
  return "Error: Please enter a valid English text.", ""
 
173
  translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
174
  farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
175
 
176
+ # Clean the Farsi text (remove leading/trailing unwanted punctuation)
177
+ farsi_text = farsi_text.strip(".!?, ")
178
+
179
  # Check if the translation is valid Farsi
180
  if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")):
181
  # Fall back to phrase-by-phrase translation
 
188
  tokenizer.src_lang = "en"
189
  encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
190
  translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
191
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True).strip(".!?, ")
192
  if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
193
  farsi_translations.append(translated_text)
194
  else: