muryshev commited on
Commit
77d95ba
1 Parent(s): ad9c449

Update llm_backend.py

Browse files
Files changed (1) hide show
  1. llm_backend.py +4 -7
llm_backend.py CHANGED
@@ -34,14 +34,13 @@ class LlmBackend:
34
  def is_model_loaded(self):
35
  return self._model is not None
36
 
37
- def load_model(self, model_path, context_size=2000, enable_gpu=True, gpu_layer_number=35, n_gqa=8, chat_format='llama-2'):
38
  log.info('load_model - started')
39
  self._model_params = {}
40
  self._model_params['model_path'] = model_path
41
  self._model_params['context_size'] = context_size
42
  self._model_params['enable_gpu'] = enable_gpu
43
  self._model_params['gpu_layer_number'] = gpu_layer_number
44
- self._model_params['n_gqa'] = n_gqa
45
  self._model_params['chat_format'] = chat_format
46
 
47
  if self._model is not None:
@@ -57,9 +56,8 @@ class LlmBackend:
57
  #n_batch=100,
58
  logits_all=True,
59
  #n_threads=12,
60
- verbose=False,
61
- n_gpu_layers=gpu_layer_number,
62
- n_gqa=n_gqa #must be set for 70b models
63
  )
64
  log.info('load_model - finished')
65
  return self._model
@@ -72,8 +70,7 @@ class LlmBackend:
72
  #n_batch=100,
73
  logits_all=True,
74
  #n_threads=12,
75
- verbose=False,
76
- n_gqa=n_gqa #must be set for 70b models
77
  )
78
  log.info('load_model - finished')
79
  return self._model
 
34
  def is_model_loaded(self):
35
  return self._model is not None
36
 
37
+ def load_model(self, model_path, context_size=2000, enable_gpu=True, gpu_layer_number=35, chat_format='llama-2'):
38
  log.info('load_model - started')
39
  self._model_params = {}
40
  self._model_params['model_path'] = model_path
41
  self._model_params['context_size'] = context_size
42
  self._model_params['enable_gpu'] = enable_gpu
43
  self._model_params['gpu_layer_number'] = gpu_layer_number
 
44
  self._model_params['chat_format'] = chat_format
45
 
46
  if self._model is not None:
 
56
  #n_batch=100,
57
  logits_all=True,
58
  #n_threads=12,
59
+ verbose=True,
60
+ n_gpu_layers=gpu_layer_number
 
61
  )
62
  log.info('load_model - finished')
63
  return self._model
 
70
  #n_batch=100,
71
  logits_all=True,
72
  #n_threads=12,
73
+ verbose=True
 
74
  )
75
  log.info('load_model - finished')
76
  return self._model