Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,9 +16,13 @@ adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file)
|
|
16 |
llm = Llama(
|
17 |
model_path=base_model_path,
|
18 |
lora_path=adapter_path,
|
19 |
-
n_ctx=
|
20 |
-
n_threads=
|
21 |
-
n_gpu_layers=0 #
|
|
|
|
|
|
|
|
|
22 |
)
|
23 |
|
24 |
# Define the translation function
|
|
|
16 |
llm = Llama(
|
17 |
model_path=base_model_path,
|
18 |
lora_path=adapter_path,
|
19 |
+
n_ctx=256, # Reduced context length (adjust based on your needs)
|
20 |
+
n_threads=8, # Use all available CPU cores (adjust based on your system)
|
21 |
+
n_gpu_layers=0, # Enable GPU acceleration (adjust based on VRAM)
|
22 |
+
use_mmap=False, # Disable memory mapping for faster access
|
23 |
+
use_mlock=True, # Lock model in memory for stability
|
24 |
+
logits_all=False, # Only compute necessary logits
|
25 |
+
embedding=False # Set to >0 if GPU acceleration is desired and supported
|
26 |
)
|
27 |
|
28 |
# Define the translation function
|