#!/usr/bin/env python import os import time from collections.abc import Iterator import anthropic import gradio as gr from gradio import ChatMessage client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) def fn( message: str, history: list[dict], max_tokens: int, thinking_budget: int ) -> Iterator[ChatMessage | list[ChatMessage]]: messages = [] for past_message in history: if past_message["role"] == "assistant" and past_message["metadata"]: continue messages.append({"role": past_message["role"], "content": past_message["content"]}) messages = [*messages, {"role": "user", "content": message}] with client.messages.stream( model="claude-3-7-sonnet-20250219", max_tokens=max_tokens, thinking={"type": "enabled", "budget_tokens": thinking_budget}, messages=messages, ) as stream: for event in stream: if event.type == "content_block_start": if event.content_block.type == "thinking": start_time = time.perf_counter() thought = ChatMessage(content="", metadata={"title": "Thinking", "status": "pending"}) yield thought else: response = ChatMessage(content="") elif event.type == "content_block_delta": if event.delta.type == "thinking_delta": thought.content += event.delta.thinking yield thought elif event.delta.type == "text_delta": response.content += event.delta.text yield [thought, response] elif event.type == "content_block_stop" and event.content_block.type == "thinking": thought.metadata["status"] = "done" thought.metadata["duration"] = time.perf_counter() - start_time yield thought demo = gr.ChatInterface( fn=fn, type="messages", additional_inputs=[ gr.Slider(label="Max Tokens", minimum=1024, maximum=128000, step=1, value=10000), gr.Slider(label="Thinking Budget", minimum=1024, maximum=128000, step=1, value=8000), ], chatbot=gr.Chatbot(type="messages", scale=1, show_copy_button=True), ) if __name__ == "__main__": demo.launch()