fajjos's picture
Add updated app.py
59410ee
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch
import bitsandbytes as bnb # Required for 4-bit quantization
# Check if CUDA is available, and decide on the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the tokenizer and the quantized LLaMA model
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load the quantized LLaMA model in 4-bit precision
model = AutoModelForCausalLM.from_pretrained(
model_name,
load_in_4bit=True, # Enable 4-bit quantization
device_map="auto" if device == "cuda" else {"": "cpu"} # Use auto if CUDA is available, else fallback to CPU
)
# Streamlit interface
st.title("Keyword Extractor using LLaMA 4-bit Model")
# Text input area for user input
user_input = st.text_area("Enter text for keyword extraction")
if user_input:
# Prepare the prompt for keyword extraction
prompt_template = (
"Extract keywords and variables from the prompt:\n"
"{}\n"
)
alpaca_prompt = prompt_template.format(user_input)
# Tokenize the input text
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(device)
# Set up the text streamer to display the generated text as it streams
text_streamer = TextStreamer(tokenizer)
# Generate keywords and extract variables
with torch.no_grad():
output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
# Decode the output tokens to get the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Display the result in the Streamlit app
st.write("Extracted Keywords and Variables:")
st.write(generated_text)