import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch from accelerate import init_empty_weights, load_checkpoint_and_dispatch # Load the model using Accelerate for memory optimization @st.cache_resource() def load_model(): MODEL_NAME = "codellama/CodeLlama-7b-hf" # Model name tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Load model with accelerate to optimize for memory usage with init_empty_weights(): model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # Use float32 for CPU low_cpu_mem_usage=True # Enable low memory usage on CPU ) # Move model to CPU model = load_checkpoint_and_dispatch( model, MODEL_NAME, device_map="cpu", # Load model onto CPU ) return pipeline("text-generation", model=model, tokenizer=tokenizer) # Initialize the model code_generator = load_model() # Streamlit UI st.title("CodeLlama-7B Code Bot 🚀") st.subheader("Generate code snippets using CodeLlama-7b-hf optimized for CPU") # User input prompt = st.text_area("Enter a coding prompt (e.g., 'Write a Python function to sort a list'): ") # Generate Code if st.button("Generate Code"): if prompt.strip(): st.info("Generating code... Please wait ⏳") try: # Generate code using the CodeLlama model response = code_generator( prompt, max_length=512, # Increase for longer code generation temperature=0.2, # Lower temperature for more deterministic results do_sample=True, # Enable sampling num_return_sequences=1 ) generated_code = response[0]['generated_text'] # Display the generated code output st.code(generated_code, language="python") # Change language as needed except Exception as e: st.error(f"Error: {str(e)}") else: st.warning("Please enter a prompt.") st.caption("Powered by CodeLlama-7B | Streamlit UI | CPU Optimized")