BudAi-Chat / app.py
CaptMetal's picture
Update app.py
26f7ee1 verified
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr
import torch
# Set quantization config (4-bit for max speed)
quant_config = BitsAndBytesConfig(
load_in_4bit=True, # 4-bit precision
bnb_4bit_quant_type="nf4", # NF4 for better accuracy
bnb_4bit_compute_dtype=torch.float16, # Use float16 for computation
device_map="auto"
)
# Load Phi-2 (smaller model with high-quality responses)
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Speed up inference with torch.compile
model = torch.compile(model) # Compile the model for faster inference
def respond(message, history):
inputs = tokenizer(message, return_tensors="pt")
outputs = model.generate(inputs.input_ids, max_new_tokens=50, temperature=0.7, top_p=0.9)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Gradio Chat Interface
gr.ChatInterface(
respond,
title="πŸ€– Phi-2 Chatbot",
description="Ask me anything! Powered by Phi-2.",
examples=["What's your favorite book?", "Tell me a fun fact about space!"],
theme="soft"
).launch()