Finetuned-NLLB / app.py
ayymen's picture
Update app.py
5e18776 verified
raw
history blame
1.7 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
MODEL_NAME = "ayymen/NLLB-200-600M-Tamazight-All-Data"
LANGS = ["tzm_Tfng", "kab_Latn", "taq_Tfng", "taq_Latn","eng_Latn", "cat_Latn", "spa_Latn", "fra_Latn", "ary_Arab"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def translate(text, source_lang, target_lang, max_length=238):
"""
Translate text from source language to target language
"""
# src_lang = choose_language(source_lang)
# tgt_lang= choose_language(target_lang)
# if src_lang==None:
# return "Error: the source langage is incorrect"
# elif tgt_lang==None:
# return "Error: the target language is incorrect"
print(text)
tokenizer.src_lang = source_lang
inputs = tokenizer(text, return_tensors="pt").to(model.device)
translated_tokens = model.generate(
**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(target_lang)
)
translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
return translation
gradio_ui= gr.Interface(
fn=translate,
title="NLLB Tamazight Translation Demo",
inputs= [
gr.components.Textbox(label="Text"),
gr.components.Dropdown(label="Source Language", choices=LANGS),
gr.components.Dropdown(label="Target Language", choices=LANGS),
# gr.components.Slider(8, 400, value=400, step=8, label="Max Length")
],
outputs=gr.components.Textbox(label="Translated text")
)
gradio_ui.launch()