import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer model_id = "deepseek-ai/DeepSeek-V3" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True) # ADD trust_remote_code=True def predict(message, history): prompt = tokenizer.apply_chat_template( [{"role": "user", "content": message}], tokenize=False, add_generation_prompt=True ) inputs = tokenizer(prompt, return_tensors="pt").to("cuda") # Assuming you have CUDA available in your Space outputs = model.generate(**inputs, max_new_tokens=50) # Adjust max_new_tokens as needed response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response iface = gr.ChatInterface( fn=predict, inputs=gr.Chatbox(), outputs=gr.Chatbot() ) iface.launch()