import gradio as gr import pandas as pd from typing import Any import datasets from tqdm import tqdm from huggingface_hub import login import os login(os.environ.get("HF_Token")) test = datasets.load_dataset( "minskiter/weibo", split=datasets.Split.TEST, streaming=True ) int2str = test.features["labels"].feature.int2str page_size = 10 pages = [] i = 0 page = pd.DataFrame(test.take(page_size)) with tqdm(desc="load dataset") as bar: while len(page.index)>0: pages.append(page) i+=1 page = pd.DataFrame(test.skip(i*page_size).take(page_size)) bar.update(i) cur = pages[0] def show(page: float)-> pd.DataFrame: global cur cur = pages[int(page)] return cur def getobj(): return { "word":[], "start": -1, "end": -1, "entity": "O" } def showIter(evt: gr.SelectData)->dict[str, Any]: row = cur.values.tolist()[evt.index[0]] text,labels = row[0],row[1] labels = int2str(list(map(int,labels))) entities = [] obj = getobj() labels.append("O") for i,label in enumerate(labels): if label[0]=='B' or label[0]=="S" or label[0]=="O": if len(obj["word"])>0: obj["word"] = "".join(obj["word"]) entities.append(obj) obj = getobj() if label[0]=="B": obj["start"] = i obj["end"] = i+1 obj["word"].append(text[i]) obj["entity"] = label.split("-")[-1] elif label[0]=="S": obj["start"] = i obj["end"] = i+1 obj["word"] = text[i] obj["entity"] = label.split("-")[-1] entities.append(obj) obj = getobj() elif label[0]=='E' or label[0]=="I" or label[0]=="M": obj["word"].append(text[i]) obj["end"] = i+1 return {"text": "".join(text), "entities": entities} with gr.Blocks() as demo: with gr.Row(): with gr.Column(): output = gr.DataFrame(value=cur) page = gr.Number(minimum=0,maximum=len(pages)-1,label="page") page.change(show, page, outputs=output) text = gr.HighlightedText(label="preview") output.select(showIter,inputs=[], outputs=[text]) demo.launch()