import gradio as gr
import fasttext
import html
import numpy as np
import types
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from transformers import AutoTokenizer
# Projektspezifische Module
from lib.bert_regressor import BertMultiHeadRegressor
from lib.bert_regressor_utils import (
load_model_and_tokenizer,
predict_flavours,
#predict_is_review,
TARGET_COLUMNS,
ICONS
)
### Stettings ####################################################################
##################################################################################
# offizielles Mirror-Repo mit lid.176.*
lid_path = hf_hub_download(
repo_id="julien-c/fasttext-language-id",
filename="lid.176.ftz"
)
lid_model = fasttext.load_model(lid_path)
# robustes predict mit NumPy-2-Fix + Fallback, falls fastText nur Labels liefert
def _predict_np2_compat(self, text, k=1, threshold=0.0, on_unicode_error='strict'):
out = self.f.predict(text, k, threshold, on_unicode_error)
# Fälle:
# 1) (labels, probs)
# 2) labels-only (einige Builds/SWIG-Versionen)
if isinstance(out, tuple) and len(out) == 2:
labels, probs = out
else:
labels = out
# sinnvolle Defaults, falls keine Wahrscheinlichkeiten vorliegen
if isinstance(labels, (list, tuple)):
probs = [1.0] * len(labels)
else:
labels = [labels]
probs = [1.0]
return labels, np.asarray(probs) # np.asarray statt np.array(copy=False)
# Instanz patchen
lid_model.predict = types.MethodType(_predict_np2_compat, lid_model)
### Check if lang is english #####################################################
def is_eng(review: str):
lang_labels, lang_probs = lid_model.predict(review)
print(lang_labels, lang_probs)
if not lang_labels: # kein Label zurückgegeben
return False, 0.0
lang_label = lang_labels[0]
lang_prob = float(lang_probs[0])
return lang_label[1] == "__label__en", lang_prob
### Do actual prediction #########################################################
def predict(review: str, mode: str):
review = (review or "").strip()
review_is_eng, review_is_eng_prob = is_eng(review)
if not review:
# immer zwei Outputs zurückgeben
return "Please enter a review.", {}
if mode == "table":
html_out = f"{html.escape(review)} | {mode}"
json_out = {} # leer lassen
return html_out, json_out
else: # "JSON"
html_out = "" # leer lassen
json_out = {
"review": review,
"mode": mode,
"is_en": {
"is": review_is_eng,
"prob": review_is_eng_prob
}
}
return html_out, json_out
### Create Form interface with Gradio Framework ##################################
iface = gr.Interface(
fn=predict,
inputs=[
gr.Textbox(
label="Whisky Review",
lines=8,
placeholder="Enter whisky review",
value="Honey roasted figs, cloves and sticky toffee pudding with a suggestive smokiness evocative of barbecued bananas."
),
gr.Dropdown(
label="Display mode",
choices=[("Table", "table"), ("JSON", "json")],
value="table"
)
],
outputs=[
gr.HTML(label="Table"),
gr.JSON(label="JSON"),
],
title="Submit Whisky Review for Classification",
# description="Paste an English whisky review, choose display mode, then submit.",
)
iface.launch()