saadustto2007's picture
Update app.py
d59e3a5 verified
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import gradio as gr
import torch
# Define the model
model_name = "facebook/m2m100_418M"
try:
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
except Exception as e:
print(f"Error loading model or tokenizer: {e}")
exit(1)
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Predefined common English-to-Farsi phrase mappings
common_phrases = {
"Hello": "سلام",
"Hi!": "سلام!",
"Good morning": "صبح بخیر",
"Good afternoon": "عصر بخیر",
"Good evening": "شب بخیر",
"Goodbye": "خداحافظ",
"Good night": "شب خوش",
"How are you?": "حالت چطوره؟",
"I am fine, thank you. And you?": "خوبم، متشکرم. و شما؟",
"Thank you (very much)": "متشکرم (خیلی ممنون)",
"You're welcome": "خواهش میکنم",
"Excuse me": "ببخشید",
"Pardon me": "معذرت می‌خواهم",
"I'm sorry": "متأسفم",
"Congratulations": "تبریک می‌گویم",
"Please sit down": "لطفاً بنشینید",
"Good luck": "موفق باشید",
"Have a good trip": "سفر خوبی داشته باشید",
"What is your name?": "اسم شما چیست؟",
"My name is Sara": "اسم من سارا است",
"Where are you from?": "اهل کجا هستید؟",
"I am from Iran": "من اهل ایران هستم",
"Do you speak English?": "آیا انگلیسی صحبت می‌کنید؟",
"I don't understand": "من متوجه نمی‌شوم",
"Please speak slowly": "لطفاً آهسته صحبت کنید",
"Do you have a Persian-English dictionary?": "آیا دیکشنری فارسی-انگلیسی دارید؟",
"How do you say this in English?": "این را در انگلیسی چگونه می‌گویند؟",
"How much is this?": "این چقدر قیمت دارد؟",
"Where is the bathroom?": "دستشویی کجاست؟",
"Help!": "کمک!",
"I am lost": "من گم شده‌ام",
"Can you help me?": "می‌توانید به من کمک کنید؟",
"What time is it?": "ساعت چند است؟",
"Where is the hospital?": "بیمارستان کجاست؟",
"I love you": "دوستت دارم",
"How can I get to the airport?": "چطور می‌توانم به فرودگاه بروم؟",
"I need a doctor": "به یک پزشک نیاز دارم",
"Where can I buy a ticket?": "از کجا می‌توانم بلیط بخرم؟",
"I am hungry": "گرسنه‌ام",
"Can I have some water?": "می‌توانم کمی آب بگیرم؟",
"It’s very beautiful": "خیلی زیباست",
"See you later": "بعداً می‌بینمت",
"What is this?": "این چیست؟",
"I am happy": "خوشحالم",
"It is very chilly today": "امروز خیلی سرد است",
"I hope we have better weather tomorrow": "امیدوارم فردا هوا بهتر شود",
}
# Function to split text into smaller phrases
def split_into_phrases(text):
separators = [",", ".", "?", "!"]
phrases = [text]
for sep in separators:
new_phrases = []
for phrase in phrases:
new_phrases.extend(phrase.split(sep))
phrases = new_phrases
return [phrase.strip() for phrase in phrases if phrase.strip()]
# Improved transliteration function (Farsi to Cyrillic)
def transliterate_farsi_to_cyrillic(farsi_text):
word_map = {
"سلام": "Салом",
"خداحافظ": "Худоҳафиз",
"شب بخیر": "Шаб хайр",
"صبح بخیر": "Субҳ хайр",
"ممنون": "Ташаккур",
"خواهش میکنم": "Илтимос",
"چطور هستی؟": "Чӣ тур ҳастӣ?",
"چطور هستید؟": "Шумо чӣ туред?",
"بله": "Ҳа",
"نه": "Не",
"ایران": "Эрон",
"تشکر": "Ташаккур",
"فارسی": "Форсӣ",
"اسم من": "Номи ман",
"لطفا": "Илтимос",
"کمک": "Кумак",
"هستی": "ҳастӣ",
"هستید": "ҳастед",
"است": "аст",
"امروز": "Имрӯз",
"خیلی": "Хеле",
"سرد": "Сард",
"امیدوارم": "Умидворам",
"فردا": "Фардо",
"هوا": "Ҳаво",
"بهتر": "Беҳтар",
"شود": "Шавад",
}
char_map = {
"ا": "а",
"ب": "б",
"پ": "п",
"ت": "т",
"ج": "ж",
"چ": "ч",
"ح": "ҳ",
"خ": "х",
"د": "д",
"ر": "р",
"ز": "з",
"س": "с",
"ش": "ш",
"ص": "с",
"ط": "т",
"ع": "ъ",
"غ": "ғ",
"ف": "ф",
"ق": "қ",
"ک": "к",
"گ": "г",
"ل": "л",
"م": "м",
"ن": "н",
"و": "в",
"ه": "ҳ",
"ی": "й",
"؟": "?",
"،": ",",
" ": " ",
}
def transliterate_name(word):
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء" for c in word) and len(word) > 2:
return "".join(char_map.get(c, c) for c in word)
return word
if farsi_text in word_map:
return word_map[farsi_text]
words = farsi_text.split()
cyrillic_words = []
for word in words:
if word in word_map:
cyrillic_words.append(word_map[word])
else:
cyrillic_words.append(transliterate_name(word))
return " ".join(cyrillic_words)
# Translation function with input validation and cleaning
def translate_to_cyrillic_farsi(text):
if not text or not text.strip():
return "Error: Please enter a valid English text.", ""
if not all(ord(char) < 128 for char in text):
return "Error: Please enter text in English (ASCII characters only).", ""
# Try full sentence translation first
tokenizer.src_lang = "en"
encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device)
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
# Clean the Farsi text (remove leading/trailing unwanted punctuation)
farsi_text = farsi_text.strip(".!?, ")
# Check if the translation is valid Farsi
if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")):
# Fall back to phrase-by-phrase translation
phrases = split_into_phrases(text)
farsi_translations = []
for phrase in phrases:
if phrase in common_phrases:
farsi_translations.append(common_phrases[phrase])
else:
tokenizer.src_lang = "en"
encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True).strip(".!?, ")
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
farsi_translations.append(translated_text)
else:
farsi_translations.append(f"[UNTRANSLATED: {phrase}]")
farsi_text = " ".join(farsi_translations)
cyrillic_text = transliterate_farsi_to_cyrillic(farsi_text)
return farsi_text, cyrillic_text
# Gradio Interface
interface = gr.Interface(
fn=translate_to_cyrillic_farsi,
inputs=gr.Textbox(label="Enter Text in English"),
outputs=[
gr.Textbox(label="Farsi Translation (Native Script)"),
gr.Textbox(label="Farsi Translation (Cyrillic Script)"),
],
title="English to Cyrillic Farsi Translator",
description="Enter an English word or sentence, and this tool will translate it to Farsi in both native and Cyrillic scripts."
)
# Launch the app
if __name__ == "__main__":
interface.launch()