johnpaulbin commited on
Commit
e0d55af
·
verified ·
1 Parent(s): 6509da1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -16,9 +16,13 @@ adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file)
16
  llm = Llama(
17
  model_path=base_model_path,
18
  lora_path=adapter_path,
19
- n_ctx=512, # Context length, set manually since adapter lacks it
20
- n_threads=2, # Adjust based on your system
21
- n_gpu_layers=0 # Set to >0 if GPU acceleration is desired and supported
 
 
 
 
22
  )
23
 
24
  # Define the translation function
 
16
  llm = Llama(
17
  model_path=base_model_path,
18
  lora_path=adapter_path,
19
+ n_ctx=256, # Reduced context length (adjust based on your needs)
20
+ n_threads=8, # Use all available CPU cores (adjust based on your system)
21
+ n_gpu_layers=0, # Enable GPU acceleration (adjust based on VRAM)
22
+ use_mmap=False, # Disable memory mapping for faster access
23
+ use_mlock=True, # Lock model in memory for stability
24
+ logits_all=False, # Only compute necessary logits
25
+ embedding=False # Set to >0 if GPU acceleration is desired and supported
26
  )
27
 
28
  # Define the translation function