Spaces:
Sleeping
Sleeping
SHAMIL SHAHBAZ AWAN
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,25 +1,37 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 3 |
import torch
|
|
|
|
| 4 |
|
| 5 |
-
# Load the
|
| 6 |
@st.cache_resource()
|
| 7 |
def load_model():
|
| 8 |
MODEL_NAME = "codellama/CodeLlama-7b-hf" # Model name
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
)
|
|
|
|
| 15 |
return pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 16 |
|
| 17 |
-
# Initialize
|
| 18 |
code_generator = load_model()
|
| 19 |
|
| 20 |
# Streamlit UI
|
| 21 |
st.title("CodeLlama-7B Code Bot π")
|
| 22 |
-
st.subheader("Generate code snippets using CodeLlama-7b-hf
|
| 23 |
|
| 24 |
# User input
|
| 25 |
prompt = st.text_area("Enter a coding prompt (e.g., 'Write a Python function to sort a list'): ")
|
|
@@ -38,7 +50,7 @@ if st.button("Generate Code"):
|
|
| 38 |
num_return_sequences=1
|
| 39 |
)
|
| 40 |
generated_code = response[0]['generated_text']
|
| 41 |
-
# Display the code output
|
| 42 |
st.code(generated_code, language="python") # Change language as needed
|
| 43 |
except Exception as e:
|
| 44 |
st.error(f"Error: {str(e)}")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 3 |
import torch
|
| 4 |
+
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
| 5 |
|
| 6 |
+
# Load the model using Accelerate for memory optimization
|
| 7 |
@st.cache_resource()
|
| 8 |
def load_model():
|
| 9 |
MODEL_NAME = "codellama/CodeLlama-7b-hf" # Model name
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 11 |
+
|
| 12 |
+
# Load model with accelerate to optimize for memory usage
|
| 13 |
+
with init_empty_weights():
|
| 14 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 15 |
+
MODEL_NAME,
|
| 16 |
+
torch_dtype=torch.float32, # Use float32 for CPU
|
| 17 |
+
low_cpu_mem_usage=True # Enable low memory usage on CPU
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Move model to CPU
|
| 21 |
+
model = load_checkpoint_and_dispatch(
|
| 22 |
+
model,
|
| 23 |
+
MODEL_NAME,
|
| 24 |
+
device_map="cpu", # Load model onto CPU
|
| 25 |
)
|
| 26 |
+
|
| 27 |
return pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 28 |
|
| 29 |
+
# Initialize the model
|
| 30 |
code_generator = load_model()
|
| 31 |
|
| 32 |
# Streamlit UI
|
| 33 |
st.title("CodeLlama-7B Code Bot π")
|
| 34 |
+
st.subheader("Generate code snippets using CodeLlama-7b-hf optimized for CPU")
|
| 35 |
|
| 36 |
# User input
|
| 37 |
prompt = st.text_area("Enter a coding prompt (e.g., 'Write a Python function to sort a list'): ")
|
|
|
|
| 50 |
num_return_sequences=1
|
| 51 |
)
|
| 52 |
generated_code = response[0]['generated_text']
|
| 53 |
+
# Display the generated code output
|
| 54 |
st.code(generated_code, language="python") # Change language as needed
|
| 55 |
except Exception as e:
|
| 56 |
st.error(f"Error: {str(e)}")
|