import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch MODEL_NAME = "ayymen/NLLB-200-600M-Tamazight-All-Data" LANGS = ["tzm_Tfng", "kab_Latn", "taq_Tfng", "taq_Latn","eng_Latn", "cat_Latn", "spa_Latn", "fra_Latn", "ary_Arab"] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) def translate(text, source_lang, target_lang, max_length=238): """ Translate text from source language to target language """ # src_lang = choose_language(source_lang) # tgt_lang= choose_language(target_lang) # if src_lang==None: # return "Error: the source langage is incorrect" # elif tgt_lang==None: # return "Error: the target language is incorrect" print(text) tokenizer.src_lang = source_lang inputs = tokenizer(text, return_tensors="pt").to(model.device) translated_tokens = model.generate( **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(target_lang) ) translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] return translation gradio_ui= gr.Interface( fn=translate, title="NLLB Tamazight Translation Demo", inputs= [ gr.components.Textbox(label="Text"), gr.components.Dropdown(label="Source Language", choices=LANGS), gr.components.Dropdown(label="Target Language", choices=LANGS), # gr.components.Slider(8, 400, value=400, step=8, label="Max Length") ], outputs=gr.components.Textbox(label="Translated text") ) gradio_ui.launch()