Nemo
Upload app.py
5f40da0 verified
raw
history blame
2.12 kB
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# Load model and tokenizer
model_name = "./model"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Check if CUDA (GPU) is available, otherwise fall back to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Define lang code to token ID mapping manually
lang_code_to_token = {
"eng_Latn": "__eng_Latn__",
"deu_Latn": "__deu_Latn__",
"fra_Latn": "__fra_Latn__",
"spa_Latn": "__spa_Latn__"
}
# Choose target language (change as needed)
target_lang_code = "eng_Latn" # ← change to "deu_Latn" for German
src_lang_code = "mya" # Burmese
def translate(text):
if not text.strip():
return "Please enter some Burmese text."
# Set source language for tokenizer
tokenizer.src_lang = src_lang_code
# Tokenize input
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
# ✅ Get forced BOS token ID manually
lang_token = lang_code_to_token[target_lang_code]
forced_bos_token_id = tokenizer.convert_tokens_to_ids(lang_token)
# Generate translation
with torch.no_grad():
generated_tokens = model.generate(
**inputs,
forced_bos_token_id=forced_bos_token_id,
max_length=128
)
return tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
# Gradio UI
with gr.Blocks(title="Burmese to English Translator") as demo:
gr.Markdown("## 🇲🇲 ➡️ 🇬🇧 Burmese to English Translator")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Burmese Text", placeholder="Type Burmese here...", lines=4)
btn = gr.Button("Translate")
with gr.Column():
output_text = gr.Textbox(label="Translation", interactive=False)
btn.click(translate, inputs=input_text, outputs=output_text)
demo.launch()