Update llm_backend.py
Browse files- llm_backend.py +4 -7
llm_backend.py
CHANGED
@@ -34,14 +34,13 @@ class LlmBackend:
|
|
34 |
def is_model_loaded(self):
|
35 |
return self._model is not None
|
36 |
|
37 |
-
def load_model(self, model_path, context_size=2000, enable_gpu=True, gpu_layer_number=35,
|
38 |
log.info('load_model - started')
|
39 |
self._model_params = {}
|
40 |
self._model_params['model_path'] = model_path
|
41 |
self._model_params['context_size'] = context_size
|
42 |
self._model_params['enable_gpu'] = enable_gpu
|
43 |
self._model_params['gpu_layer_number'] = gpu_layer_number
|
44 |
-
self._model_params['n_gqa'] = n_gqa
|
45 |
self._model_params['chat_format'] = chat_format
|
46 |
|
47 |
if self._model is not None:
|
@@ -57,9 +56,8 @@ class LlmBackend:
|
|
57 |
#n_batch=100,
|
58 |
logits_all=True,
|
59 |
#n_threads=12,
|
60 |
-
verbose=
|
61 |
-
n_gpu_layers=gpu_layer_number
|
62 |
-
n_gqa=n_gqa #must be set for 70b models
|
63 |
)
|
64 |
log.info('load_model - finished')
|
65 |
return self._model
|
@@ -72,8 +70,7 @@ class LlmBackend:
|
|
72 |
#n_batch=100,
|
73 |
logits_all=True,
|
74 |
#n_threads=12,
|
75 |
-
verbose=
|
76 |
-
n_gqa=n_gqa #must be set for 70b models
|
77 |
)
|
78 |
log.info('load_model - finished')
|
79 |
return self._model
|
|
|
34 |
def is_model_loaded(self):
|
35 |
return self._model is not None
|
36 |
|
37 |
+
def load_model(self, model_path, context_size=2000, enable_gpu=True, gpu_layer_number=35, chat_format='llama-2'):
|
38 |
log.info('load_model - started')
|
39 |
self._model_params = {}
|
40 |
self._model_params['model_path'] = model_path
|
41 |
self._model_params['context_size'] = context_size
|
42 |
self._model_params['enable_gpu'] = enable_gpu
|
43 |
self._model_params['gpu_layer_number'] = gpu_layer_number
|
|
|
44 |
self._model_params['chat_format'] = chat_format
|
45 |
|
46 |
if self._model is not None:
|
|
|
56 |
#n_batch=100,
|
57 |
logits_all=True,
|
58 |
#n_threads=12,
|
59 |
+
verbose=True,
|
60 |
+
n_gpu_layers=gpu_layer_number
|
|
|
61 |
)
|
62 |
log.info('load_model - finished')
|
63 |
return self._model
|
|
|
70 |
#n_batch=100,
|
71 |
logits_all=True,
|
72 |
#n_threads=12,
|
73 |
+
verbose=True
|
|
|
74 |
)
|
75 |
log.info('load_model - finished')
|
76 |
return self._model
|