Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM | |
| import torch | |
| import spaces | |
| MODEL_PATH = "benhaotang/phi4-qwq-sky-t1" | |
| MODEL_URL = f"https://huggingface.co/{MODEL_PATH}" | |
| def load_model(): | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_8bit=False, | |
| llm_int8_enable_fp32_cpu_offload=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_PATH, | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| offload_folder="offload_folder", | |
| quantization_config=bnb_config | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device_map="auto", | |
| ) | |
| return pipe | |
| pipe = load_model() | |
| def generate_response(prompt, max_length=1024): | |
| # Create messages with system prompt | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful AI assistant. You always think step by step."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| outputs = pipe(messages, max_new_tokens=max_length) | |
| # Extract just the assistant's response | |
| try: | |
| # Get the message list from the output | |
| message_list = eval(outputs[0]["generated_text"]) # Safely convert string to list | |
| # Get the last message (assistant's response) | |
| assistant_message = message_list[-1] | |
| if assistant_message["role"] == "assistant": | |
| return assistant_message["content"] | |
| except Exception as e: | |
| print(f"Error extracting response: {e}") | |
| # If extraction fails, return the raw output | |
| return str(outputs[0]["generated_text"]) | |
| return outputs[0]["generated_text"] | |
| # Example with proper line breaks | |
| example_prompt = """For a scalar field theory with interaction Lagrangian $\mathcal{L}_{int} = g\phi^3 + \lambda\phi^4$: | |
| 1. Enumerate all possible 1-loop Feynman diagrams contributing to the scalar propagator | |
| 2. For each diagram, write down its loop contribution | |
| 3. Provide Mathematica code to calculate these loop amplitudes with dimensional regularization at $d=4-\epsilon$ | |
| Please explain your reasoning step by step.""" | |
| demo = gr.Interface( | |
| fn=generate_response, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Enter your question", | |
| placeholder="Ask me anything...", | |
| lines=5 | |
| ), | |
| ], | |
| outputs=gr.Textbox(label="Response", lines=10), | |
| title="benhaotang/phi4-qwq-sky-t1", | |
| description=f""" To achieve CoT and science reasoning on small scale with a merge of CoT finetuned phi4 model. | |
| Model: [benhaotang/phi4-qwq-sky-t1]({MODEL_URL})""", | |
| examples=[ | |
| [example_prompt] # Now using the formatted example | |
| ] | |
| ) | |
| demo.launch() |