Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import torch | |
MODEL_NAME = "ayymen/NLLB-200-600M-Tamazight-All-Data" | |
LANGS = ["tzm_Tfng", "kab_Latn", "taq_Tfng", "taq_Latn","eng_Latn", "cat_Latn", "spa_Latn", "fra_Latn", "ary_Arab"] | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
def translate(text, source_lang, target_lang, max_length=238): | |
""" | |
Translate text from source language to target language | |
""" | |
# src_lang = choose_language(source_lang) | |
# tgt_lang= choose_language(target_lang) | |
# if src_lang==None: | |
# return "Error: the source langage is incorrect" | |
# elif tgt_lang==None: | |
# return "Error: the target language is incorrect" | |
print(text) | |
tokenizer.src_lang = source_lang | |
inputs = tokenizer(text, return_tensors="pt").to(model.device) | |
translated_tokens = model.generate( | |
**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(target_lang) | |
) | |
translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] | |
return translation | |
gradio_ui= gr.Interface( | |
fn=translate, | |
title="NLLB Tamazight Translation Demo", | |
inputs= [ | |
gr.components.Textbox(label="Text"), | |
gr.components.Dropdown(label="Source Language", choices=LANGS), | |
gr.components.Dropdown(label="Target Language", choices=LANGS), | |
# gr.components.Slider(8, 400, value=400, step=8, label="Max Length") | |
], | |
outputs=gr.components.Textbox(label="Translated text") | |
) | |
gradio_ui.launch() |