from html import unescape
from unicodedata import normalize
import gradio as gr
from transformers import pipeline
import re

re_multispace = re.compile(r"\s+")

def normalize_text(text):
    if text == None:
        return None

    text = text.strip()
    text = text.replace("\n", " ")
    text = text.replace("\t", " ")
    text = text.replace("\r", " ")
    text = re_multispace.sub(" ", text)
    text = unescape(text)
    text = normalize("NFKC", text)
    return text


models = [
    "Server", "Category", "Gender", "Day Of Week"
]

pipelines = {model: pipeline(task="text-classification",
 model=f"hynky/{model.replace(' ', '_')}", tokenizer="ufal/robeczech-base",
 truncation=True, max_length=512,
 top_k=5
) for model in models}


def predict(article):
    article = normalize_text(article)
    predictions = [pipelines[model](article)[0] for model in models]
    predictions = [{pred["label"]: round(pred["score"], 3) for pred in task_preds} for task_preds in predictions]
    return tuple(predictions)

gr.Interface(
    predict,
    inputs=gr.inputs.Textbox(lines=4, placeholder="Paste a news article here..."),
    # multioutput of gradio text
    outputs=[gr.outputs.Label(num_top_classes=5, label=model)
    for model in models],
    title="News Article Classifier",
).launch()