seanpedrickcase commited on
Commit
aba68bf
·
1 Parent(s): 714810a

Loading llama_cpp inside load_model instead of on app load

Browse files
Files changed (1) hide show
  1. tools/llm_funcs.py +4 -5
tools/llm_funcs.py CHANGED
@@ -21,11 +21,6 @@ tokenizer = list() #[] # Define empty list for model functions to run
21
  from tools.config import RUN_AWS_FUNCTIONS, AWS_REGION, LLM_TEMPERATURE, LLM_TOP_K, LLM_MIN_P, LLM_TOP_P, LLM_REPETITION_PENALTY, LLM_LAST_N_TOKENS, LLM_MAX_NEW_TOKENS, LLM_SEED, LLM_RESET, LLM_STREAM, LLM_THREADS, LLM_BATCH_SIZE, LLM_CONTEXT_LENGTH, LLM_SAMPLE, MAX_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, MAX_COMMENT_CHARS, RUN_LOCAL_MODEL, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, HF_TOKEN, LLM_SEED, LLM_MAX_GPU_LAYERS, SPECULATIVE_DECODING, NUM_PRED_TOKENS
22
  from tools.prompts import initial_table_assistant_prefill
23
 
24
- if RUN_LOCAL_MODEL == "1":
25
- print("Running local model - importing llama-cpp-python")
26
- from llama_cpp import Llama
27
- from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
28
-
29
  if SPECULATIVE_DECODING == "True": SPECULATIVE_DECODING = True
30
  else: SPECULATIVE_DECODING = False
31
 
@@ -225,6 +220,10 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
225
  # Verify the device and cuda settings
226
  # Check if CUDA is enabled
227
  import torch
 
 
 
 
228
 
229
  torch.cuda.empty_cache()
230
  print("Is CUDA enabled? ", torch.cuda.is_available())
 
21
  from tools.config import RUN_AWS_FUNCTIONS, AWS_REGION, LLM_TEMPERATURE, LLM_TOP_K, LLM_MIN_P, LLM_TOP_P, LLM_REPETITION_PENALTY, LLM_LAST_N_TOKENS, LLM_MAX_NEW_TOKENS, LLM_SEED, LLM_RESET, LLM_STREAM, LLM_THREADS, LLM_BATCH_SIZE, LLM_CONTEXT_LENGTH, LLM_SAMPLE, MAX_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, MAX_COMMENT_CHARS, RUN_LOCAL_MODEL, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, HF_TOKEN, LLM_SEED, LLM_MAX_GPU_LAYERS, SPECULATIVE_DECODING, NUM_PRED_TOKENS
22
  from tools.prompts import initial_table_assistant_prefill
23
 
 
 
 
 
 
24
  if SPECULATIVE_DECODING == "True": SPECULATIVE_DECODING = True
25
  else: SPECULATIVE_DECODING = False
26
 
 
220
  # Verify the device and cuda settings
221
  # Check if CUDA is enabled
222
  import torch
223
+ #if RUN_LOCAL_MODEL == "1":
224
+ #print("Running local model - importing llama-cpp-python")
225
+ from llama_cpp import Llama
226
+ from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
227
 
228
  torch.cuda.empty_cache()
229
  print("Is CUDA enabled? ", torch.cuda.is_available())