smugri4-preview / app.py
mphi's picture
Update app.py
2497f22 verified
import gradio as gr
import torch
from kuidastaltsutadalaamat.trainllm import load_model, load_tokenizer
from kuidastaltsutadalaamat.inference import llm_generate
from kuidastaltsutadalaamat.data import LazyTokenizingInferenceDataset
from kuidastaltsutadalaamat.promptops import *
accel = None
model_id = "mphi/smugri4-1808-hh_cpt-ep5_sft-ep3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = load_model(model_id, device, accelerator=accel, attention="eager") #eager for cpu
model.eval()
tokenizer = load_tokenizer(model_id, accelerator=accel)
languages = ["English",
"Erzya",
"Estonian",
"Estonian, Hiiu, dictionary",
"Estonian, Kihnu, dictionary",
"Estonian, Muhu, dictionary",
"Estonian, Mulgi",
"Estonian, Ranna, dictionary",
"Finnish",
"Hill Mari",
"Hungarian",
"Inari Sami",
"Izhorian, Alamaluuga, speech",
"Izhorian, Mehmet",
"Izhorian, Soikkola",
"Kazym Khanty, 2013",
"Kildin Sami, Orth1",
"Kildin Sami, Orth2",
"Komi-Permyak",
"Komi-Zyrian",
"Kven",
"Latvian",
"Livonian, Standard",
"Livvi",
"Ludian",
"Ludian, Miikul",
"Lule Sami",
"Mansi, Obs",
"Mansi, Sosv",
"Mansi, Verh",
"Meadow Mari",
"Meänkieli",
"Moksha",
"Northern Sami",
"Norwegian",
"Pite Sami",
"Priur Khanty",
"Proper Karelian",
"Russian",
"Shur Khanty, 2013",
"Skolt Sami",
"Southern Sami",
"Sred Khanty",
"Surgut Khanty, 2013",
"Swedish",
"Udmurt",
"Ume Sami",
"Vakh Khanty, 2013",
"Veps",
"Votic, Standard",
"Võro, Lei, dictionary",
"Võro, Lut, dictionary",
"Võro, Setom",
"Võro, Sõnaq",
"Võro, Uma",
]
languages_full = ["English",
"English, bible",
"English, dictionary",
"English, speech",
"Erzya",
"Erzya, bible",
"Erzya, dictionary",
"Estonian",
"Estonian, Amb, dictionary",
"Estonian, Ann, dictionary",
"Estonian, Aud, dictionary",
"Estonian, Hag, dictionary",
"Estonian, Han, dictionary",
"Estonian, Hii, emm, dictionary",
"Estonian, Hii, käi, dictionary",
"Estonian, Hii, phl, dictionary",
"Estonian, Hii, rei, dictionary",
"Estonian, Hiiu, dictionary",
"Estonian, Hjn, dictionary",
"Estonian, Hljr, dictionary",
"Estonian, Hmd, dictionary",
"Estonian, Hää, dictionary",
"Estonian, Iisi, dictionary",
"Estonian, Iisk, dictionary",
"Estonian, Iisr, dictionary",
"Estonian, Jjn, dictionary",
"Estonian, Jmd, dictionary",
"Estonian, Juu, dictionary",
"Estonian, Jõer, dictionary",
"Estonian, Jõh, dictionary",
"Estonian, Jür, dictionary",
"Estonian, Kad, dictionary",
"Estonian, Kihnu, dictionary",
"Estonian, Kjn, dictionary",
"Estonian, Kodi, dictionary",
"Estonian, Kos, dictionary",
"Estonian, Kse, dictionary",
"Estonian, Kuuk, dictionary",
"Estonian, Kuur, dictionary",
"Estonian, Lng, dictionary",
"Estonian, Lüg, dictionary",
"Estonian, Mar, dictionary",
"Estonian, Mih, dictionary",
"Estonian, Muhu, dictionary",
"Estonian, Mul, hel, dictionary",
"Estonian, Mul, hls, dictionary",
"Estonian, Mul, krk, dictionary",
"Estonian, Mul, trv, dictionary",
"Estonian, Mulgi",
"Estonian, Mulgi, dictionary",
"Estonian, Mär, dictionary",
"Estonian, Pjg, dictionary",
"Estonian, Plt, dictionary",
"Estonian, Ranna, dictionary",
"Estonian, Rap, dictionary",
"Estonian, Rid, dictionary",
"Estonian, Saa, dictionary",
"Estonian, Saa, jäm, dictionary",
"Estonian, Saa, kaa, dictionary",
"Estonian, Saa, khk, dictionary",
"Estonian, Saa, kär, dictionary",
"Estonian, Saa, mus, dictionary",
"Estonian, Saa, pha, dictionary",
"Estonian, Saa, pöi, dictionary",
"Estonian, Seto",
"Estonian, Seto, dictionary",
"Estonian, Trm, dictionary",
"Estonian, Trt, nõo, dictionary",
"Estonian, Trt, ote, dictionary",
"Estonian, Trt, puh, dictionary",
"Estonian, Trt, ran, dictionary",
"Estonian, Tõs, dictionary",
"Estonian, Tür, dictionary",
"Estonian, Vair, dictionary",
"Estonian, Var, dictionary",
"Estonian, Vig, dictionary",
"Estonian, Vjg, dictionary",
"Estonian, Vng, dictionary",
"Estonian, Vän, dictionary",
"Estonian, bible",
"Estonian, dictionary",
"Estonian, Äks, dictionary",
"Finnish",
"Finnish, bible",
"Finnish, dictionary",
"French",
"German",
"Hill Mari",
"Hill Mari, bible",
"Hungarian",
"Hungarian, bible",
"Inari Sami",
"Inari Sami, dictionary",
"Izhorian",
"Izhorian, Alamaluuga, speech",
"Izhorian, Mehmet",
"Izhorian, Soikkola",
"Izhorian, Soikkola, speech",
"Izhorian, speech",
"Kazym Khanty",
"Kazym Khanty, 2000",
"Kazym Khanty, 2013",
"Kildin Sami",
"Kildin Sami, Orth1",
"Kildin Sami, Orth1, dictionary",
"Kildin Sami, Orth2",
"Kildin Sami, Orth2, dictionary",
"Komi-Permyak",
"Komi-Permyak, bible",
"Komi-Zyrian",
"Komi-Zyrian, bible",
"Komi-Zyrian, dictionary",
"Kven",
"Kven, dictionary",
"Latvian",
"Latvian, bible",
"Latvian, dictionary",
"Livonian, Idaliivi, ft",
"Livonian, Ira, ft",
"Livonian, Lääneliivi, ft",
"Livonian, Standard",
"Livonian, Standard, dictionary",
"Livvi",
"Livvi, Impilahti",
"Livvi, Kondushi",
"Livvi, Kotkozero",
"Livvi, Nekkula",
"Livvi, Newwritten",
"Livvi, Oldwritten",
"Livvi, Rypushkalitsa",
"Livvi, Salmi",
"Livvi, Syamozero",
"Livvi, Tulmozero",
"Livvi, Vedlozero",
"Livvi, Vidlitsa",
"Livvi, bible",
"Ludian",
"Ludian, Central",
"Ludian, Miikul",
"Ludian, Miikul, dictionary",
"Ludian, Mikhailovskoye",
"Ludian, Newwritten",
"Ludian, Northern",
"Ludian, Southern",
"Lule Sami",
"Lule Sami, dictionary",
"Mansi, Obs",
"Mansi, Sosv",
"Mansi, Unk",
"Mansi, Unk, bible",
"Mansi, Verh",
"Meadow Mari",
"Meadow Mari, bible",
"Meadow Mari, dictionary",
"Meänkieli",
"Moksha",
"Moksha, bible",
"Northern Sami",
"Northern Sami, dictionary",
"Norwegian",
"Norwegian, bible",
"Norwegian, dictionary",
"Pite Sami",
"Priur Khanty",
"Proper Karelian",
"Proper Karelian, Dyorzha",
"Proper Karelian, Ilomantsi",
"Proper Karelian, Keret",
"Proper Karelian, Kestenga",
"Proper Karelian, Kontokki",
"Proper Karelian, Korbiselga",
"Proper Karelian, Myandyselga",
"Proper Karelian, Newwritten",
"Proper Karelian, Newwrittentver",
"Proper Karelian, Oldwritten",
"Proper Karelian, Oldwrittentver",
"Proper Karelian, Oulanga",
"Proper Karelian, Padany",
"Proper Karelian, Panozero",
"Proper Karelian, Poduzhemye",
"Proper Karelian, Porosozero",
"Proper Karelian, Reboly",
"Proper Karelian, Rugozero",
"Proper Karelian, Suistamo",
"Proper Karelian, Suoyarvi",
"Proper Karelian, Suvi",
"Proper Karelian, Tikhtozero",
"Proper Karelian, Tikhvin",
"Proper Karelian, Tolmachi",
"Proper Karelian, Tunguda",
"Proper Karelian, Uhta",
"Proper Karelian, Valdai",
"Proper Karelian, Vesyegonsk",
"Proper Karelian, Viena",
"Proper Karelian, Voknavolok",
"Proper Karelian, Vychetaibola",
"Proper Karelian, Yushkozero",
"Proper Karelian, bible",
"Russian",
"Russian, bible",
"Russian, dictionary",
"Russian, speech",
"Shur Khanty",
"Shur Khanty, 2013",
"Shur Khanty, bible",
"Skolt Sami",
"Skolt Sami, dictionary",
"Southern Sami",
"Southern Sami, dictionary",
"Sred Khanty",
"Surgut Khanty",
"Surgut Khanty, 2000",
"Surgut Khanty, 2013",
"Swedish",
"Udmurt",
"Udmurt, bible",
"Udmurt, dictionary",
"Ume Sami",
"Unk Khanty",
"Vakh Khanty",
"Vakh Khanty, 2013",
"Veps",
"Veps, Centraleastern",
"Veps, Centralwestern",
"Veps, Newwritten",
"Veps, Northern",
"Veps, Southern",
"Veps, bible",
"Votic, I",
"Votic, Idavadja, ft",
"Votic, J",
"Votic, Ja",
"Votic, K",
"Votic, Ke",
"Votic, Kõ",
"Votic, L",
"Votic, Li",
"Votic, Lu",
"Votic, Läänevadja, ft",
"Votic, M",
"Votic, P",
"Votic, Po",
"Votic, R",
"Votic, Ra",
"Votic, S",
"Votic, Standard",
"Votic, U",
"Votic, Unk",
"Votic, Unk, dictionary",
"Votic, Unk, speech",
"Votic, V",
"Votic, dictionary",
"Võro, Har, dictionary",
"Võro, Kan, dictionary",
"Võro, Krl, dictionary",
"Võro, Lei, dictionary",
"Võro, Lut, dictionary",
"Võro, Plv, dictionary",
"Võro, Räp, dictionary",
"Võro, Rõu, dictionary",
"Võro, Setom",
"Võro, Sõnaq",
"Võro, Sõnaq, dictionary",
"Võro, Uma",
"Võro, Urv, dictionary",
"Võro, Vas, dictionary",
"Võro, X"
]
def run_inference(text, from_lang, to_lang, mode):
entry = {"src_segm": text, "task": mode}
if mode == "translate":
entry.update({"src_lang": from_lang, "tgt_lang": to_lang})
prompt_format = PF_SMUGRI_MT
else:
prompt_format = PF_SMUGRI_LID
ds = LazyTokenizingInferenceDataset([entry], tokenizer, prompt_format)
tok = ds[0]
output = llm_generate(model, tokenizer, tok, debug=False, max_len=512)
return output[0]
with gr.Blocks() as demo:
text_input = gr.Textbox(label="Text", lines=6, placeholder="Enter text...")
identify_btn = gr.Button("Identify language", interactive=False)
with gr.Row():
from_dropdown = gr.Dropdown(choices=languages_full, label="From", value=None)
to_dropdown = gr.Dropdown(choices=languages, label="To", value=None)
translate_btn = gr.Button("Translate", interactive=False)
output = gr.Textbox(label="Output", lines=6)
def toggle_identify(text):
return gr.update(interactive=bool(text.strip()))
text_input.change(toggle_identify, [text_input], [identify_btn])
def toggle_translate(text, f, t):
return gr.update(interactive=bool(text.strip() and f and t))
text_input.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn])
from_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn])
to_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn])
identify_btn.click(
fn=lambda text: run_inference(text, None, None, mode="identify"),
inputs=[text_input],
outputs=[from_dropdown],
).then(
lambda *args: gr.update(interactive=bool(text_input.value.strip() and from_dropdown.value and to_dropdown.value)),
[], [translate_btn]
)
translate_btn.click(
fn=lambda text, f, t: run_inference(text, f, t, mode="translate"),
inputs=[text_input, from_dropdown, to_dropdown],
outputs=[output]
)
if __name__ == "__main__":
demo.launch()