Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer | |
import torch | |
import bitsandbytes as bnb # Required for 4-bit quantization | |
# Check if CUDA is available, and decide on the device | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Load the tokenizer and the quantized LLaMA model | |
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Load the quantized LLaMA model in 4-bit precision | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
load_in_4bit=True, # Enable 4-bit quantization | |
device_map="auto" if device == "cuda" else {"": "cpu"} # Use auto if CUDA is available, else fallback to CPU | |
) | |
# Streamlit interface | |
st.title("Keyword Extractor using LLaMA 4-bit Model") | |
# Text input area for user input | |
user_input = st.text_area("Enter text for keyword extraction") | |
if user_input: | |
# Prepare the prompt for keyword extraction | |
prompt_template = ( | |
"Extract keywords and variables from the prompt:\n" | |
"{}\n" | |
) | |
alpaca_prompt = prompt_template.format(user_input) | |
# Tokenize the input text | |
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(device) | |
# Set up the text streamer to display the generated text as it streams | |
text_streamer = TextStreamer(tokenizer) | |
# Generate keywords and extract variables | |
with torch.no_grad(): | |
output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128) | |
# Decode the output tokens to get the generated text | |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
# Display the result in the Streamlit app | |
st.write("Extracted Keywords and Variables:") | |
st.write(generated_text) | |