File size: 1,320 Bytes
ac52ed4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-0.5B"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

def highlight_probabilities(text):
    inputs = tokenizer([text], return_tensors="pt").input_ids.to(model.device)
    inp, outp = inputs[:, :-1], inputs[:, 1:].unsqueeze(-1)

    with torch.no_grad():
        logits = model(inp).logits

    probs = torch.softmax(logits, dim=-1)
    chosen = torch.gather(probs, dim=2, index=outp).squeeze(-1).cpu().numpy()[0]

    tokens = tokenizer.convert_ids_to_tokens(inp[0].cpu().tolist())
    highlights = [
        (tok.replace("Ġ", ""), float(p)) for tok, p in zip(tokens, chosen)
    ]
    return highlights

with gr.Blocks() as demo:
    gr.Markdown("## Token-by-Token Probability Highlighter")
    txt = gr.Textbox(
        label="Input Text", 
        placeholder="Type or paste any text here…" , 
        lines=4
    )
    highlighted = gr.HighlightedText(
        label="Token Probabilities",
        combine_adjacent=True,
        show_legend=True,
    )
    txt.change(
        fn=highlight_probabilities, 
        inputs=txt, 
        outputs=highlighted
    )

if __name__ == "__main__":
    demo.launch()