Code-Bro / app.py
SHAMIL SHAHBAZ AWAN
Update app.py
ac26bd5 verified
raw
history blame
2.16 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
# Load the model using Accelerate for memory optimization
@st.cache_resource()
def load_model():
MODEL_NAME = "codellama/CodeLlama-7b-hf" # Model name
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Load model with accelerate to optimize for memory usage
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32, # Use float32 for CPU
low_cpu_mem_usage=True # Enable low memory usage on CPU
)
# Move model to CPU
model = load_checkpoint_and_dispatch(
model,
MODEL_NAME,
device_map="cpu", # Load model onto CPU
)
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# Initialize the model
code_generator = load_model()
# Streamlit UI
st.title("CodeLlama-7B Code Bot πŸš€")
st.subheader("Generate code snippets using CodeLlama-7b-hf optimized for CPU")
# User input
prompt = st.text_area("Enter a coding prompt (e.g., 'Write a Python function to sort a list'): ")
# Generate Code
if st.button("Generate Code"):
if prompt.strip():
st.info("Generating code... Please wait ⏳")
try:
# Generate code using the CodeLlama model
response = code_generator(
prompt,
max_length=512, # Increase for longer code generation
temperature=0.2, # Lower temperature for more deterministic results
do_sample=True, # Enable sampling
num_return_sequences=1
)
generated_code = response[0]['generated_text']
# Display the generated code output
st.code(generated_code, language="python") # Change language as needed
except Exception as e:
st.error(f"Error: {str(e)}")
else:
st.warning("Please enter a prompt.")
st.caption("Powered by CodeLlama-7B | Streamlit UI | CPU Optimized")