Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from kuidastaltsutadalaamat.trainllm import load_model, load_tokenizer | |
from kuidastaltsutadalaamat.inference import llm_generate | |
from kuidastaltsutadalaamat.data import LazyTokenizingInferenceDataset | |
from kuidastaltsutadalaamat.promptops import * | |
accel = None | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = load_model("mphi/smugri4-1808-ep3-tmptest", device, accelerator=accel, attention="eager") #eager for cpu | |
model.eval() | |
tokenizer = load_tokenizer("mphi/smugri4-1808-ep3-tmptest", accelerator=accel) | |
languages = ["English", | |
"English, bible", | |
"English, dictionary", | |
"English, speech", | |
"Erzya", | |
"Erzya, bible", | |
"Erzya, dictionary", | |
"Estonian", | |
"Estonian, Amb, dictionary", | |
"Estonian, Ann, dictionary", | |
"Estonian, Aud, dictionary", | |
"Estonian, Hag, dictionary", | |
"Estonian, Han, dictionary", | |
"Estonian, Hii, emm, dictionary", | |
"Estonian, Hii, käi, dictionary", | |
"Estonian, Hii, phl, dictionary", | |
"Estonian, Hii, rei, dictionary", | |
"Estonian, Hiiu, dictionary", | |
"Estonian, Hjn, dictionary", | |
"Estonian, Hljr, dictionary", | |
"Estonian, Hmd, dictionary", | |
"Estonian, Hää, dictionary", | |
"Estonian, Iisi, dictionary", | |
"Estonian, Iisk, dictionary", | |
"Estonian, Iisr, dictionary", | |
"Estonian, Jjn, dictionary", | |
"Estonian, Jmd, dictionary", | |
"Estonian, Juu, dictionary", | |
"Estonian, Jõer, dictionary", | |
"Estonian, Jõh, dictionary", | |
"Estonian, Jür, dictionary", | |
"Estonian, Kad, dictionary", | |
"Estonian, Kihnu, dictionary", | |
"Estonian, Kjn, dictionary", | |
"Estonian, Kodi, dictionary", | |
"Estonian, Kos, dictionary", | |
"Estonian, Kse, dictionary", | |
"Estonian, Kuuk, dictionary", | |
"Estonian, Kuur, dictionary", | |
"Estonian, Lng, dictionary", | |
"Estonian, Lüg, dictionary", | |
"Estonian, Mar, dictionary", | |
"Estonian, Mih, dictionary", | |
"Estonian, Muhu, dictionary", | |
"Estonian, Mul, hel, dictionary", | |
"Estonian, Mul, hls, dictionary", | |
"Estonian, Mul, krk, dictionary", | |
"Estonian, Mul, trv, dictionary", | |
"Estonian, Mulgi", | |
"Estonian, Mulgi, dictionary", | |
"Estonian, Mär, dictionary", | |
"Estonian, Pjg, dictionary", | |
"Estonian, Plt, dictionary", | |
"Estonian, Ranna, dictionary", | |
"Estonian, Rap, dictionary", | |
"Estonian, Rid, dictionary", | |
"Estonian, Saa, dictionary", | |
"Estonian, Saa, jäm, dictionary", | |
"Estonian, Saa, kaa, dictionary", | |
"Estonian, Saa, khk, dictionary", | |
"Estonian, Saa, kär, dictionary", | |
"Estonian, Saa, mus, dictionary", | |
"Estonian, Saa, pha, dictionary", | |
"Estonian, Saa, pöi, dictionary", | |
"Estonian, Seto", | |
"Estonian, Seto, dictionary", | |
"Estonian, Trm, dictionary", | |
"Estonian, Trt, nõo, dictionary", | |
"Estonian, Trt, ote, dictionary", | |
"Estonian, Trt, puh, dictionary", | |
"Estonian, Trt, ran, dictionary", | |
"Estonian, Tõs, dictionary", | |
"Estonian, Tür, dictionary", | |
"Estonian, Vair, dictionary", | |
"Estonian, Var, dictionary", | |
"Estonian, Vig, dictionary", | |
"Estonian, Vjg, dictionary", | |
"Estonian, Vng, dictionary", | |
"Estonian, Vän, dictionary", | |
"Estonian, bible", | |
"Estonian, dictionary", | |
"Estonian, Äks, dictionary", | |
"Finnish", | |
"Finnish, bible", | |
"Finnish, dictionary", | |
"French", | |
"German", | |
"Hill Mari", | |
"Hill Mari, bible", | |
"Hungarian", | |
"Hungarian, bible", | |
"Inari Sami", | |
"Inari Sami, dictionary", | |
"Izhorian", | |
"Izhorian, Alamaluuga, speech", | |
"Izhorian, Mehmet", | |
"Izhorian, Soikkola", | |
"Izhorian, Soikkola, speech", | |
"Izhorian, speech", | |
"Kazym Khanty", | |
"Kazym Khanty, 2000", | |
"Kazym Khanty, 2013", | |
"Kildin Sami", | |
"Kildin Sami, Orth1", | |
"Kildin Sami, Orth1, dictionary", | |
"Kildin Sami, Orth2", | |
"Kildin Sami, Orth2, dictionary", | |
"Komi-Permyak", | |
"Komi-Permyak, bible", | |
"Komi-Zyrian", | |
"Komi-Zyrian, bible", | |
"Komi-Zyrian, dictionary", | |
"Kven", | |
"Kven, dictionary", | |
"Latvian", | |
"Latvian, bible", | |
"Latvian, dictionary", | |
"Livonian, Idaliivi, ft", | |
"Livonian, Ira, ft", | |
"Livonian, Lääneliivi, ft", | |
"Livonian, Standard", | |
"Livonian, Standard, dictionary", | |
"Livvi", | |
"Livvi, Impilahti", | |
"Livvi, Kondushi", | |
"Livvi, Kotkozero", | |
"Livvi, Nekkula", | |
"Livvi, Newwritten", | |
"Livvi, Oldwritten", | |
"Livvi, Rypushkalitsa", | |
"Livvi, Salmi", | |
"Livvi, Syamozero", | |
"Livvi, Tulmozero", | |
"Livvi, Vedlozero", | |
"Livvi, Vidlitsa", | |
"Livvi, bible", | |
"Ludian", | |
"Ludian, Central", | |
"Ludian, Miikul", | |
"Ludian, Miikul, dictionary", | |
"Ludian, Mikhailovskoye", | |
"Ludian, Newwritten", | |
"Ludian, Northern", | |
"Ludian, Southern", | |
"Lule Sami", | |
"Lule Sami, dictionary", | |
"Mansi, Obs", | |
"Mansi, Sosv", | |
"Mansi, Unk", | |
"Mansi, Unk, bible", | |
"Mansi, Verh", | |
"Meadow Mari", | |
"Meadow Mari, bible", | |
"Meadow Mari, dictionary", | |
"Meänkieli", | |
"Moksha", | |
"Moksha, bible", | |
"Northern Sami", | |
"Northern Sami, dictionary", | |
"Norwegian", | |
"Norwegian, bible", | |
"Norwegian, dictionary", | |
"Pite Sami", | |
"Priur Khanty", | |
"Proper Karelian", | |
"Proper Karelian, Dyorzha", | |
"Proper Karelian, Ilomantsi", | |
"Proper Karelian, Keret", | |
"Proper Karelian, Kestenga", | |
"Proper Karelian, Kontokki", | |
"Proper Karelian, Korbiselga", | |
"Proper Karelian, Myandyselga", | |
"Proper Karelian, Newwritten", | |
"Proper Karelian, Newwrittentver", | |
"Proper Karelian, Oldwritten", | |
"Proper Karelian, Oldwrittentver", | |
"Proper Karelian, Oulanga", | |
"Proper Karelian, Padany", | |
"Proper Karelian, Panozero", | |
"Proper Karelian, Poduzhemye", | |
"Proper Karelian, Porosozero", | |
"Proper Karelian, Reboly", | |
"Proper Karelian, Rugozero", | |
"Proper Karelian, Suistamo", | |
"Proper Karelian, Suoyarvi", | |
"Proper Karelian, Suvi", | |
"Proper Karelian, Tikhtozero", | |
"Proper Karelian, Tikhvin", | |
"Proper Karelian, Tolmachi", | |
"Proper Karelian, Tunguda", | |
"Proper Karelian, Uhta", | |
"Proper Karelian, Valdai", | |
"Proper Karelian, Vesyegonsk", | |
"Proper Karelian, Viena", | |
"Proper Karelian, Voknavolok", | |
"Proper Karelian, Vychetaibola", | |
"Proper Karelian, Yushkozero", | |
"Proper Karelian, bible", | |
"Russian", | |
"Russian, bible", | |
"Russian, dictionary", | |
"Russian, speech", | |
"Shur Khanty", | |
"Shur Khanty, 2013", | |
"Shur Khanty, bible", | |
"Skolt Sami", | |
"Skolt Sami, dictionary", | |
"Southern Sami", | |
"Southern Sami, dictionary", | |
"Sred Khanty", | |
"Surgut Khanty", | |
"Surgut Khanty, 2000", | |
"Surgut Khanty, 2013", | |
"Swedish", | |
"Udmurt", | |
"Udmurt, bible", | |
"Udmurt, dictionary", | |
"Ume Sami", | |
"Unk Khanty", | |
"Vakh Khanty", | |
"Vakh Khanty, 2013", | |
"Veps", | |
"Veps, Centraleastern", | |
"Veps, Centralwestern", | |
"Veps, Newwritten", | |
"Veps, Northern", | |
"Veps, Southern", | |
"Veps, bible", | |
"Votic, I", | |
"Votic, Idavadja, ft", | |
"Votic, J", | |
"Votic, Ja", | |
"Votic, K", | |
"Votic, Ke", | |
"Votic, Kõ", | |
"Votic, L", | |
"Votic, Li", | |
"Votic, Lu", | |
"Votic, Läänevadja, ft", | |
"Votic, M", | |
"Votic, P", | |
"Votic, Po", | |
"Votic, R", | |
"Votic, Ra", | |
"Votic, S", | |
"Votic, Standard", | |
"Votic, U", | |
"Votic, Unk", | |
"Votic, Unk, dictionary", | |
"Votic, Unk, speech", | |
"Votic, V", | |
"Votic, dictionary", | |
"Võro, Har, dictionary", | |
"Võro, Kan, dictionary", | |
"Võro, Krl, dictionary", | |
"Võro, Lei, dictionary", | |
"Võro, Lut, dictionary", | |
"Võro, Plv, dictionary", | |
"Võro, Räp, dictionary", | |
"Võro, Rõu, dictionary", | |
"Võro, Setom", | |
"Võro, Sõnaq", | |
"Võro, Sõnaq, dictionary", | |
"Võro, Uma", | |
"Võro, Urv, dictionary", | |
"Võro, Vas, dictionary", | |
"Võro, X" | |
] | |
def run_inference(text, from_lang, to_lang, mode): | |
entry = {"src_segm": text, "task": mode} | |
if mode == "translate": | |
entry.update({"src_lang": from_lang, "tgt_lang": to_lang}) | |
prompt_format = PF_SMUGRI_MT | |
else: | |
prompt_format = PF_SMUGRI_LID | |
ds = LazyTokenizingInferenceDataset([entry], tokenizer, prompt_format) | |
tok = ds[0] | |
output = llm_generate(model, tokenizer, tok, debug=False, max_len=512) | |
return output[0] | |
with gr.Blocks() as demo: | |
text_input = gr.Textbox(label="Text", lines=6, placeholder="Enter text...") | |
identify_btn = gr.Button("Identify language", interactive=False) | |
with gr.Row(): | |
from_dropdown = gr.Dropdown(choices=languages, label="From", value=None) | |
to_dropdown = gr.Dropdown(choices=languages, label="To", value=None) | |
translate_btn = gr.Button("Translate", interactive=False) | |
output = gr.Textbox(label="Output", lines=6) | |
def toggle_identify(text): | |
return gr.update(interactive=bool(text.strip())) | |
text_input.change(toggle_identify, [text_input], [identify_btn]) | |
def toggle_translate(text, f, t): | |
return gr.update(interactive=bool(text.strip() and f and t)) | |
text_input.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn]) | |
from_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn]) | |
to_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn]) | |
identify_btn.click( | |
fn=lambda text: run_inference(text, None, None, mode="identify"), | |
inputs=[text_input], | |
outputs=[from_dropdown], | |
).then( | |
lambda *args: gr.update(interactive=bool(text_input.value.strip() and from_dropdown.value and to_dropdown.value)), | |
[], [translate_btn] | |
) | |
translate_btn.click( | |
fn=lambda text, f, t: run_inference(text, f, t, mode="translate"), | |
inputs=[text_input, from_dropdown, to_dropdown], | |
outputs=[output] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |