Spaces:

CaptMetal
/

BudAi-Chat

Sleeping

BudAi-Chat / app.py

Update app.py

26f7ee1 verified 8 months ago

1.23 kB

	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import gradio as gr
	import torch


	# Set quantization config (4-bit for max speed)
	quant_config = BitsAndBytesConfig(
	load_in_4bit=True, # 4-bit precision
	bnb_4bit_quant_type="nf4", # NF4 for better accuracy
	bnb_4bit_compute_dtype=torch.float16, # Use float16 for computation
	device_map="auto"
	)
	# Load Phi-2 (smaller model with high-quality responses)
	model_name = "microsoft/phi-2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	model = AutoModelForCausalLM.from_pretrained(model_name)
	# Speed up inference with torch.compile
	model = torch.compile(model) # Compile the model for faster inference

	def respond(message, history):
	inputs = tokenizer(message, return_tensors="pt")
	outputs = model.generate(inputs.input_ids, max_new_tokens=50, temperature=0.7, top_p=0.9)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response

	# Gradio Chat Interface
	gr.ChatInterface(
	respond,
	title="🤖 Phi-2 Chatbot",
	description="Ask me anything! Powered by Phi-2.",
	examples=["What's your favorite book?", "Tell me a fun fact about space!"],
	theme="soft"
	).launch()