SHAMIL SHAHBAZ AWAN commited on
Commit
ac26bd5
Β·
verified Β·
1 Parent(s): 947bd50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -8
app.py CHANGED
@@ -1,25 +1,37 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
 
4
 
5
- # Load the CodeLlama model and tokenizer
6
  @st.cache_resource()
7
  def load_model():
8
  MODEL_NAME = "codellama/CodeLlama-7b-hf" # Model name
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- MODEL_NAME,
12
- torch_dtype=torch.float32, # Use float32 for CPU
13
- device_map="cpu" # Map model to CPU
 
 
 
 
 
 
 
 
 
 
14
  )
 
15
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
16
 
17
- # Initialize pipeline
18
  code_generator = load_model()
19
 
20
  # Streamlit UI
21
  st.title("CodeLlama-7B Code Bot πŸš€")
22
- st.subheader("Generate code snippets using CodeLlama-7b-hf on CPU")
23
 
24
  # User input
25
  prompt = st.text_area("Enter a coding prompt (e.g., 'Write a Python function to sort a list'): ")
@@ -38,7 +50,7 @@ if st.button("Generate Code"):
38
  num_return_sequences=1
39
  )
40
  generated_code = response[0]['generated_text']
41
- # Display the code output
42
  st.code(generated_code, language="python") # Change language as needed
43
  except Exception as e:
44
  st.error(f"Error: {str(e)}")
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
+ from accelerate import init_empty_weights, load_checkpoint_and_dispatch
5
 
6
+ # Load the model using Accelerate for memory optimization
7
  @st.cache_resource()
8
  def load_model():
9
  MODEL_NAME = "codellama/CodeLlama-7b-hf" # Model name
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+
12
+ # Load model with accelerate to optimize for memory usage
13
+ with init_empty_weights():
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ MODEL_NAME,
16
+ torch_dtype=torch.float32, # Use float32 for CPU
17
+ low_cpu_mem_usage=True # Enable low memory usage on CPU
18
+ )
19
+
20
+ # Move model to CPU
21
+ model = load_checkpoint_and_dispatch(
22
+ model,
23
+ MODEL_NAME,
24
+ device_map="cpu", # Load model onto CPU
25
  )
26
+
27
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
28
 
29
+ # Initialize the model
30
  code_generator = load_model()
31
 
32
  # Streamlit UI
33
  st.title("CodeLlama-7B Code Bot πŸš€")
34
+ st.subheader("Generate code snippets using CodeLlama-7b-hf optimized for CPU")
35
 
36
  # User input
37
  prompt = st.text_area("Enter a coding prompt (e.g., 'Write a Python function to sort a list'): ")
 
50
  num_return_sequences=1
51
  )
52
  generated_code = response[0]['generated_text']
53
+ # Display the generated code output
54
  st.code(generated_code, language="python") # Change language as needed
55
  except Exception as e:
56
  st.error(f"Error: {str(e)}")