|
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer |
|
import gradio as gr |
|
import torch |
|
|
|
|
|
model_name = "facebook/m2m100_418M" |
|
|
|
try: |
|
tokenizer = M2M100Tokenizer.from_pretrained(model_name) |
|
model = M2M100ForConditionalGeneration.from_pretrained(model_name) |
|
except Exception as e: |
|
print(f"Error loading model or tokenizer: {e}") |
|
exit(1) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = model.to(device) |
|
|
|
|
|
common_phrases = { |
|
"Hello": "سلام", |
|
"Hi!": "سلام!", |
|
"Good morning": "صبح بخیر", |
|
"Good afternoon": "عصر بخیر", |
|
"Good evening": "شب بخیر", |
|
"Goodbye": "خداحافظ", |
|
"Good night": "شب خوش", |
|
"How are you?": "حالت چطوره؟", |
|
"I am fine, thank you. And you?": "خوبم، متشکرم. و شما؟", |
|
"Thank you (very much)": "متشکرم (خیلی ممنون)", |
|
"You're welcome": "خواهش میکنم", |
|
"Excuse me": "ببخشید", |
|
"Pardon me": "معذرت میخواهم", |
|
"I'm sorry": "متأسفم", |
|
"Congratulations": "تبریک میگویم", |
|
"Please sit down": "لطفاً بنشینید", |
|
"Good luck": "موفق باشید", |
|
"Have a good trip": "سفر خوبی داشته باشید", |
|
"What is your name?": "اسم شما چیست؟", |
|
"My name is Sara": "اسم من سارا است", |
|
"Where are you from?": "اهل کجا هستید؟", |
|
"I am from Iran": "من اهل ایران هستم", |
|
"Do you speak English?": "آیا انگلیسی صحبت میکنید؟", |
|
"I don't understand": "من متوجه نمیشوم", |
|
"Please speak slowly": "لطفاً آهسته صحبت کنید", |
|
"Do you have a Persian-English dictionary?": "آیا دیکشنری فارسی-انگلیسی دارید؟", |
|
"How do you say this in English?": "این را در انگلیسی چگونه میگویند؟", |
|
"How much is this?": "این چقدر قیمت دارد؟", |
|
"Where is the bathroom?": "دستشویی کجاست؟", |
|
"Help!": "کمک!", |
|
"I am lost": "من گم شدهام", |
|
"Can you help me?": "میتوانید به من کمک کنید؟", |
|
"What time is it?": "ساعت چند است؟", |
|
"Where is the hospital?": "بیمارستان کجاست؟", |
|
"I love you": "دوستت دارم", |
|
"How can I get to the airport?": "چطور میتوانم به فرودگاه بروم؟", |
|
"I need a doctor": "به یک پزشک نیاز دارم", |
|
"Where can I buy a ticket?": "از کجا میتوانم بلیط بخرم؟", |
|
"I am hungry": "گرسنهام", |
|
"Can I have some water?": "میتوانم کمی آب بگیرم؟", |
|
"It’s very beautiful": "خیلی زیباست", |
|
"See you later": "بعداً میبینمت", |
|
"What is this?": "این چیست؟", |
|
"I am happy": "خوشحالم", |
|
"It is very chilly today": "امروز خیلی سرد است", |
|
"I hope we have better weather tomorrow": "امیدوارم فردا هوا بهتر شود", |
|
} |
|
|
|
|
|
def split_into_phrases(text): |
|
separators = [",", ".", "?", "!"] |
|
phrases = [text] |
|
for sep in separators: |
|
new_phrases = [] |
|
for phrase in phrases: |
|
new_phrases.extend(phrase.split(sep)) |
|
phrases = new_phrases |
|
return [phrase.strip() for phrase in phrases if phrase.strip()] |
|
|
|
|
|
def transliterate_farsi_to_cyrillic(farsi_text): |
|
word_map = { |
|
"سلام": "Салом", |
|
"خداحافظ": "Худоҳафиз", |
|
"شب بخیر": "Шаб хайр", |
|
"صبح بخیر": "Субҳ хайр", |
|
"ممنون": "Ташаккур", |
|
"خواهش میکنم": "Илтимос", |
|
"چطور هستی؟": "Чӣ тур ҳастӣ?", |
|
"چطور هستید؟": "Шумо чӣ туред?", |
|
"بله": "Ҳа", |
|
"نه": "Не", |
|
"ایران": "Эрон", |
|
"تشکر": "Ташаккур", |
|
"فارسی": "Форсӣ", |
|
"اسم من": "Номи ман", |
|
"لطفا": "Илтимос", |
|
"کمک": "Кумак", |
|
"هستی": "ҳастӣ", |
|
"هستید": "ҳастед", |
|
"است": "аст", |
|
"امروز": "Имрӯз", |
|
"خیلی": "Хеле", |
|
"سرد": "Сард", |
|
"امیدوارم": "Умидворам", |
|
"فردا": "Фардо", |
|
"هوا": "Ҳаво", |
|
"بهتر": "Беҳтар", |
|
"شود": "Шавад", |
|
} |
|
|
|
char_map = { |
|
"ا": "а", |
|
"ب": "б", |
|
"پ": "п", |
|
"ت": "т", |
|
"ج": "ж", |
|
"چ": "ч", |
|
"ح": "ҳ", |
|
"خ": "х", |
|
"د": "д", |
|
"ر": "р", |
|
"ز": "з", |
|
"س": "с", |
|
"ش": "ш", |
|
"ص": "с", |
|
"ط": "т", |
|
"ع": "ъ", |
|
"غ": "ғ", |
|
"ف": "ф", |
|
"ق": "қ", |
|
"ک": "к", |
|
"گ": "г", |
|
"ل": "л", |
|
"م": "м", |
|
"ن": "н", |
|
"و": "в", |
|
"ه": "ҳ", |
|
"ی": "й", |
|
"؟": "?", |
|
"،": ",", |
|
" ": " ", |
|
} |
|
|
|
def transliterate_name(word): |
|
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء" for c in word) and len(word) > 2: |
|
return "".join(char_map.get(c, c) for c in word) |
|
return word |
|
|
|
if farsi_text in word_map: |
|
return word_map[farsi_text] |
|
|
|
words = farsi_text.split() |
|
cyrillic_words = [] |
|
for word in words: |
|
if word in word_map: |
|
cyrillic_words.append(word_map[word]) |
|
else: |
|
cyrillic_words.append(transliterate_name(word)) |
|
|
|
return " ".join(cyrillic_words) |
|
|
|
|
|
def translate_to_cyrillic_farsi(text): |
|
if not text or not text.strip(): |
|
return "Error: Please enter a valid English text.", "" |
|
if not all(ord(char) < 128 for char in text): |
|
return "Error: Please enter text in English (ASCII characters only).", "" |
|
|
|
|
|
tokenizer.src_lang = "en" |
|
encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device) |
|
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa")) |
|
farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True) |
|
|
|
|
|
farsi_text = farsi_text.strip(".!?, ") |
|
|
|
|
|
if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")): |
|
|
|
phrases = split_into_phrases(text) |
|
farsi_translations = [] |
|
for phrase in phrases: |
|
if phrase in common_phrases: |
|
farsi_translations.append(common_phrases[phrase]) |
|
else: |
|
tokenizer.src_lang = "en" |
|
encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device) |
|
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa")) |
|
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True).strip(".!?, ") |
|
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")): |
|
farsi_translations.append(translated_text) |
|
else: |
|
farsi_translations.append(f"[UNTRANSLATED: {phrase}]") |
|
farsi_text = " ".join(farsi_translations) |
|
|
|
cyrillic_text = transliterate_farsi_to_cyrillic(farsi_text) |
|
return farsi_text, cyrillic_text |
|
|
|
|
|
interface = gr.Interface( |
|
fn=translate_to_cyrillic_farsi, |
|
inputs=gr.Textbox(label="Enter Text in English"), |
|
outputs=[ |
|
gr.Textbox(label="Farsi Translation (Native Script)"), |
|
gr.Textbox(label="Farsi Translation (Cyrillic Script)"), |
|
], |
|
title="English to Cyrillic Farsi Translator", |
|
description="Enter an English word or sentence, and this tool will translate it to Farsi in both native and Cyrillic scripts." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch() |