import gradio as gr import fasttext import html import numpy as np import types from huggingface_hub import hf_hub_download from safetensors.torch import load_file from transformers import AutoTokenizer # Projektspezifische Module from lib.bert_regressor import BertMultiHeadRegressor from lib.bert_regressor_utils import ( load_model_and_tokenizer, predict_flavours, #predict_is_review, TARGET_COLUMNS, ICONS ) ### Stettings #################################################################### ################################################################################## # offizielles Mirror-Repo mit lid.176.* lid_path = hf_hub_download( repo_id="julien-c/fasttext-language-id", filename="lid.176.ftz" ) lid_model = fasttext.load_model(lid_path) # robustes predict mit NumPy-2-Fix + Fallback, falls fastText nur Labels liefert def _predict_np2_compat(self, text, k=1, threshold=0.0, on_unicode_error='strict'): out = self.f.predict(text, k, threshold, on_unicode_error) # Fälle: # 1) (labels, probs) # 2) labels-only (einige Builds/SWIG-Versionen) if isinstance(out, tuple) and len(out) == 2: labels, probs = out else: labels = out # sinnvolle Defaults, falls keine Wahrscheinlichkeiten vorliegen if isinstance(labels, (list, tuple)): probs = [1.0] * len(labels) else: labels = [labels] probs = [1.0] return labels, np.asarray(probs) # np.asarray statt np.array(copy=False) # Instanz patchen lid_model.predict = types.MethodType(_predict_np2_compat, lid_model) ### Check if lang is english ##################################################### def is_eng(review: str): lang_labels, lang_probs = lid_model.predict(review) print(lang_labels, lang_probs) if not lang_labels: # kein Label zurückgegeben return False, 0.0 lang_label = lang_labels[0] lang_prob = float(lang_probs[0]) return lang_label[1] == "__label__en", lang_prob ### Do actual prediction ######################################################### def predict(review: str, mode: str): review = (review or "").strip() review_is_eng, review_is_eng_prob = is_eng(review) if not review: # immer zwei Outputs zurückgeben return "Please enter a review.", {} if mode == "table": html_out = f"{html.escape(review)} | {mode}" json_out = {} # leer lassen return html_out, json_out else: # "JSON" html_out = "" # leer lassen json_out = { "review": review, "mode": mode, "is_en": { "is": review_is_eng, "prob": review_is_eng_prob } } return html_out, json_out ### Create Form interface with Gradio Framework ################################## iface = gr.Interface( fn=predict, inputs=[ gr.Textbox( label="Whisky Review", lines=8, placeholder="Enter whisky review", value="Honey roasted figs, cloves and sticky toffee pudding with a suggestive smokiness evocative of barbecued bananas." ), gr.Dropdown( label="Display mode", choices=[("Table", "table"), ("JSON", "json")], value="table" ) ], outputs=[ gr.HTML(label="Table"), gr.JSON(label="JSON"), ], title="Submit Whisky Review for Classification", # description="Paste an English whisky review, choose display mode, then submit.", ) iface.launch()