from html import unescape from unicodedata import normalize import gradio as gr from transformers import pipeline import re re_multispace = re.compile(r"\s+") def normalize_text(text): if text == None: return None text = text.strip() text = text.replace("\n", " ") text = text.replace("\t", " ") text = text.replace("\r", " ") text = re_multispace.sub(" ", text) text = unescape(text) text = normalize("NFKC", text) return text models = [ "Server", "Category", "Gender", "Day Of Week" ] pipelines = {model: pipeline(task="text-classification", model=f"hynky/{model.replace(' ', '_')}", tokenizer="ufal/robeczech-base", truncation=True, max_length=512, top_k=5 ) for model in models} def predict(article): article = normalize_text(article) predictions = [pipelines[model](article)[0] for model in models] predictions = [{pred["label"]: round(pred["score"], 3) for pred in task_preds} for task_preds in predictions] return tuple(predictions) gr.Interface( predict, inputs=gr.inputs.Textbox(lines=4, placeholder="Paste a news article here..."), # multioutput of gradio text outputs=[gr.outputs.Label(num_top_classes=5, label=model) for model in models], title="News Article Classifier", ).launch()