File size: 2,314 Bytes
0314abc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python

import os
import time
from collections.abc import Iterator

import anthropic
import gradio as gr
from gradio import ChatMessage

client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))


def fn(
    message: str, history: list[dict], max_tokens: int, thinking_budget: int
) -> Iterator[ChatMessage | list[ChatMessage]]:
    messages = []
    for past_message in history:
        if past_message["role"] == "assistant" and past_message["metadata"]:
            continue
        messages.append({"role": past_message["role"], "content": past_message["content"]})
    messages = [*messages, {"role": "user", "content": message}]

    with client.messages.stream(
        model="claude-3-7-sonnet-20250219",
        max_tokens=max_tokens,
        thinking={"type": "enabled", "budget_tokens": thinking_budget},
        messages=messages,
    ) as stream:
        for event in stream:
            if event.type == "content_block_start":
                if event.content_block.type == "thinking":
                    start_time = time.perf_counter()
                    thought = ChatMessage(content="", metadata={"title": "Thinking", "status": "pending"})
                    yield thought
                else:
                    response = ChatMessage(content="")
            elif event.type == "content_block_delta":
                if event.delta.type == "thinking_delta":
                    thought.content += event.delta.thinking
                    yield thought
                elif event.delta.type == "text_delta":
                    response.content += event.delta.text
                    yield [thought, response]
            elif event.type == "content_block_stop" and event.content_block.type == "thinking":
                thought.metadata["status"] = "done"
                thought.metadata["duration"] = time.perf_counter() - start_time
                yield thought


demo = gr.ChatInterface(
    fn=fn,
    type="messages",
    additional_inputs=[
        gr.Slider(label="Max Tokens", minimum=1024, maximum=128000, step=1, value=10000),
        gr.Slider(label="Thinking Budget", minimum=1024, maximum=128000, step=1, value=8000),
    ],
    chatbot=gr.Chatbot(type="messages", scale=1, show_copy_button=True),
)

if __name__ == "__main__":
    demo.launch()