ngxson HF staff commited on
Commit
27d54e6
1 Parent(s): feaa097

better isolation, various improvements

Browse files
Files changed (3) hide show
  1. .dockerignore +2 -1
  2. .gitignore +1 -0
  3. app.py +219 -169
.dockerignore CHANGED
@@ -1,2 +1,3 @@
1
  /downloads
2
- /llama.cpp
 
 
1
  /downloads
2
+ /llama.cpp
3
+ /outputs
.gitignore CHANGED
@@ -164,3 +164,4 @@ cython_debug/
164
  /downloads
165
  !/downloads/.keep
166
  /llama.cpp
 
 
164
  /downloads
165
  !/downloads/.keep
166
  /llama.cpp
167
+ /outputs
app.py CHANGED
@@ -12,21 +12,34 @@ from textwrap import dedent
12
  from apscheduler.schedulers.background import BackgroundScheduler
13
 
14
 
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
-
17
- def generate_importance_matrix(model_path, train_data_path):
18
- imatrix_command = f"./llama-imatrix -m ../{model_path} -f {train_data_path} -ngl 99 --output-frequency 10"
19
-
20
- os.chdir("llama.cpp")
21
-
22
- print(f"Current working directory: {os.getcwd()}")
23
- print(f"Files in the current directory: {os.listdir('.')}")
24
-
25
- if not os.path.isfile(f"../{model_path}"):
 
 
 
 
 
 
 
 
 
 
 
 
26
  raise Exception(f"Model file not found: {model_path}")
27
 
28
  print("Running imatrix command...")
29
- process = subprocess.Popen(imatrix_command, shell=True)
30
 
31
  try:
32
  process.wait(timeout=60) # added wait
@@ -39,36 +52,54 @@ def generate_importance_matrix(model_path, train_data_path):
39
  print("Imatrix proc still didn't term. Forecfully terming process...")
40
  process.kill()
41
 
42
- os.chdir("..")
43
-
44
  print("Importance matrix generation completed.")
45
 
46
- def split_upload_model(model_path, repo_id, oauth_token: gr.OAuthToken | None, split_max_tensors=256, split_max_size=None):
 
 
 
47
  if oauth_token.token is None:
48
  raise ValueError("You have to be logged in.")
49
 
50
- split_cmd = f"llama.cpp/llama-gguf-split --split --split-max-tensors {split_max_tensors}"
 
 
 
51
  if split_max_size:
52
- split_cmd += f" --split-max-size {split_max_size}"
53
- split_cmd += f" {model_path} {model_path.split('.')[0]}"
54
-
 
 
 
 
 
 
 
 
55
  print(f"Split command: {split_cmd}")
56
 
57
- result = subprocess.run(split_cmd, shell=True, capture_output=True, text=True)
58
  print(f"Split command stdout: {result.stdout}")
59
  print(f"Split command stderr: {result.stderr}")
60
 
61
  if result.returncode != 0:
62
- raise Exception(f"Error splitting the model: {result.stderr}")
 
63
  print("Model split successfully!")
64
-
65
-
66
- sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
 
 
 
 
 
67
  if sharded_model_files:
68
  print(f"Sharded model files: {sharded_model_files}")
69
  api = HfApi(token=oauth_token.token)
70
  for file in sharded_model_files:
71
- file_path = os.path.join('.', file)
72
  print(f"Uploading file: {file_path}")
73
  try:
74
  api.upload_file(
@@ -87,7 +118,6 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
87
  if oauth_token.token is None:
88
  raise ValueError("You must be logged in to use GGUF-my-repo")
89
  model_name = model_id.split('/')[-1]
90
- fp16 = f"{model_name}.fp16.gguf"
91
 
92
  try:
93
  api = HfApi(token=oauth_token.token)
@@ -108,160 +138,180 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
108
 
109
  dl_pattern += [pattern]
110
 
111
- with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
112
- # Keep the model name as the dirname so the model name metadata is populated correctly
113
- local_dir = Path(tmpdir)/model_name
114
- print(local_dir)
115
- api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
116
- print("Model downloaded successfully!")
117
- print(f"Current working directory: {os.getcwd()}")
118
- print(f"Model directory contents: {os.listdir(local_dir)}")
119
-
120
- config_dir = local_dir/"config.json"
121
- adapter_config_dir = local_dir/"adapter_config.json"
122
- if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
123
- raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
124
-
125
- conversion_script = "convert_hf_to_gguf.py"
126
- fp16_conversion = f"python llama.cpp/{conversion_script} {local_dir} --outtype f16 --outfile {fp16}"
127
- result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
128
- print(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  if result.returncode != 0:
130
- raise Exception(f"Error converting to fp16: {result.stderr}")
131
- print("Model converted to fp16 successfully!")
132
- print(f"Converted model path: {fp16}")
 
133
 
134
- imatrix_path = "llama.cpp/imatrix.dat"
 
 
 
 
135
 
136
- if use_imatrix:
137
- if train_data_file:
138
- train_data_path = train_data_file.name
139
- else:
140
- train_data_path = "groups_merged.txt" #fallback calibration dataset
141
-
142
- print(f"Training data file path: {train_data_path}")
143
-
144
- if not os.path.isfile(train_data_path):
145
- raise Exception(f"Training data file not found: {train_data_path}")
146
-
147
- generate_importance_matrix(fp16, train_data_path)
148
- else:
149
- print("Not using imatrix quantization.")
150
- username = whoami(oauth_token.token)["name"]
151
- quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
152
- quantized_gguf_path = quantized_gguf_name
153
- if use_imatrix:
154
- quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
155
- else:
156
- quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
157
- result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
158
- if result.returncode != 0:
159
- raise Exception(f"Error quantizing: {result.stderr}")
160
- print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
161
- print(f"Quantized model path: {quantized_gguf_path}")
162
-
163
- # Create empty repo
164
- new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{imatrix_q_method if use_imatrix else q_method}-GGUF", exist_ok=True, private=private_repo)
165
- new_repo_id = new_repo_url.repo_id
166
- print("Repo created successfully!", new_repo_url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
- try:
169
- card = ModelCard.load(model_id, token=oauth_token.token)
170
- except:
171
- card = ModelCard("")
172
- if card.data.tags is None:
173
- card.data.tags = []
174
- card.data.tags.append("llama-cpp")
175
- card.data.tags.append("gguf-my-repo")
176
- card.data.base_model = model_id
177
- card.text = dedent(
178
- f"""
179
- # {new_repo_id}
180
- This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
181
- Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
182
-
183
- ## Use with llama.cpp
184
- Install llama.cpp through brew (works on Mac and Linux)
185
-
186
- ```bash
187
- brew install llama.cpp
188
-
189
- ```
190
- Invoke the llama.cpp server or the CLI.
191
-
192
- ### CLI:
193
- ```bash
194
- llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
195
- ```
196
-
197
- ### Server:
198
- ```bash
199
- llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
200
- ```
201
 
202
- Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
203
-
204
- Step 1: Clone llama.cpp from GitHub.
205
- ```
206
- git clone https://github.com/ggerganov/llama.cpp
207
- ```
208
-
209
- Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
210
- ```
211
- cd llama.cpp && LLAMA_CURL=1 make
212
- ```
213
-
214
- Step 3: Run inference through the main binary.
215
- ```
216
- ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
217
- ```
218
- or
219
- ```
220
- ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
221
- ```
222
- """
223
- )
224
- card.save(f"README.md")
225
-
226
- if split_model:
227
- split_upload_model(quantized_gguf_path, new_repo_id, oauth_token, split_max_tensors, split_max_size)
228
- else:
229
- try:
230
- print(f"Uploading quantized model: {quantized_gguf_path}")
231
- api.upload_file(
232
- path_or_fileobj=quantized_gguf_path,
233
- path_in_repo=quantized_gguf_name,
234
- repo_id=new_repo_id,
235
- )
236
- except Exception as e:
237
- raise Exception(f"Error uploading quantized model: {e}")
238
-
239
-
240
- imatrix_path = "llama.cpp/imatrix.dat"
241
- if os.path.isfile(imatrix_path):
242
- try:
243
- print(f"Uploading imatrix.dat: {imatrix_path}")
244
- api.upload_file(
245
- path_or_fileobj=imatrix_path,
246
- path_in_repo="imatrix.dat",
247
- repo_id=new_repo_id,
248
- )
249
- except Exception as e:
250
- raise Exception(f"Error uploading imatrix.dat: {e}")
251
 
252
- api.upload_file(
253
- path_or_fileobj=f"README.md",
254
- path_in_repo=f"README.md",
255
- repo_id=new_repo_id,
256
- )
257
- print(f"Uploaded successfully with {imatrix_q_method if use_imatrix else q_method} option!")
258
 
259
  return (
260
- f'<h1>✅ DONE</h1><br/><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
261
  "llama.png",
262
  )
263
  except Exception as e:
264
- return (f"<h1>❌ ERROR</h1><br/><br/>{e}", "error.png")
265
 
266
 
267
  css="""/* Custom CSS to allow scrolling */
@@ -329,7 +379,7 @@ with gr.Blocks(css=css) as demo:
329
 
330
  split_max_size = gr.Textbox(
331
  label="Max File Size",
332
- info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default.",
333
  visible=False
334
  )
335
 
 
12
  from apscheduler.schedulers.background import BackgroundScheduler
13
 
14
 
15
+ # used for restarting the space
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
+ CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
18
+
19
+ # escape HTML for logging
20
+ def escape(s: str) -> str:
21
+ s = s.replace("&", "&amp;") # Must be done first!
22
+ s = s.replace("<", "&lt;")
23
+ s = s.replace(">", "&gt;")
24
+ s = s.replace('"', "&quot;")
25
+ s = s.replace("\n", "<br/>")
26
+ return s
27
+
28
+ def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
29
+ imatrix_command = [
30
+ "./llama.cpp/llama-imatrix",
31
+ "-m", model_path,
32
+ "-f", train_data_path,
33
+ "-ngl", "99",
34
+ "--output-frequency", "10",
35
+ "-o", output_path,
36
+ ]
37
+
38
+ if not os.path.isfile(model_path):
39
  raise Exception(f"Model file not found: {model_path}")
40
 
41
  print("Running imatrix command...")
42
+ process = subprocess.Popen(imatrix_command, shell=False)
43
 
44
  try:
45
  process.wait(timeout=60) # added wait
 
52
  print("Imatrix proc still didn't term. Forecfully terming process...")
53
  process.kill()
54
 
 
 
55
  print("Importance matrix generation completed.")
56
 
57
+ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token: gr.OAuthToken | None, split_max_tensors=256, split_max_size=None):
58
+ print(f"Model path: {model_path}")
59
+ print(f"Output dir: {outdir}")
60
+
61
  if oauth_token.token is None:
62
  raise ValueError("You have to be logged in.")
63
 
64
+ split_cmd = [
65
+ "./llama.cpp/llama-gguf-split",
66
+ "--split",
67
+ ]
68
  if split_max_size:
69
+ split_cmd.append("--split-max-size")
70
+ split_cmd.append(split_max_size)
71
+ else:
72
+ split_cmd.append("--split-max-tensors")
73
+ split_cmd.append(str(split_max_tensors))
74
+
75
+ # args for output
76
+ model_path_prefix = '.'.join(model_path.split('.')[:-1]) # remove the file extension
77
+ split_cmd.append(model_path)
78
+ split_cmd.append(model_path_prefix)
79
+
80
  print(f"Split command: {split_cmd}")
81
 
82
+ result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
83
  print(f"Split command stdout: {result.stdout}")
84
  print(f"Split command stderr: {result.stderr}")
85
 
86
  if result.returncode != 0:
87
+ stderr_str = result.stderr.decode("utf-8")
88
+ raise Exception(f"Error splitting the model: {stderr_str}")
89
  print("Model split successfully!")
90
+
91
+ # remove the original model file if needed
92
+ if os.path.exists(model_path):
93
+ os.remove(model_path)
94
+
95
+ model_file_prefix = model_path_prefix.split('/')[-1]
96
+ print(f"Model file name prefix: {model_file_prefix}")
97
+ sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
98
  if sharded_model_files:
99
  print(f"Sharded model files: {sharded_model_files}")
100
  api = HfApi(token=oauth_token.token)
101
  for file in sharded_model_files:
102
+ file_path = os.path.join(outdir, file)
103
  print(f"Uploading file: {file_path}")
104
  try:
105
  api.upload_file(
 
118
  if oauth_token.token is None:
119
  raise ValueError("You must be logged in to use GGUF-my-repo")
120
  model_name = model_id.split('/')[-1]
 
121
 
122
  try:
123
  api = HfApi(token=oauth_token.token)
 
138
 
139
  dl_pattern += [pattern]
140
 
141
+ if not os.path.exists("downloads"):
142
+ os.makedirs("downloads")
143
+
144
+ if not os.path.exists("outputs"):
145
+ os.makedirs("outputs")
146
+
147
+ with tempfile.TemporaryDirectory(dir="outputs") as outdir:
148
+ fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
149
+
150
+ with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
151
+ # Keep the model name as the dirname so the model name metadata is populated correctly
152
+ local_dir = Path(tmpdir)/model_name
153
+ print(local_dir)
154
+ api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
155
+ print("Model downloaded successfully!")
156
+ print(f"Current working directory: {os.getcwd()}")
157
+ print(f"Model directory contents: {os.listdir(local_dir)}")
158
+
159
+ config_dir = local_dir/"config.json"
160
+ adapter_config_dir = local_dir/"adapter_config.json"
161
+ if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
162
+ raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
163
+
164
+ result = subprocess.run([
165
+ "python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16
166
+ ], shell=False, capture_output=True)
167
+ print(result)
168
+ if result.returncode != 0:
169
+ stderr_str = result.stderr.decode("utf-8")
170
+ raise Exception(f"Error converting to fp16: {stderr_str}")
171
+ print("Model converted to fp16 successfully!")
172
+ print(f"Converted model path: {fp16}")
173
+
174
+ imatrix_path = Path(outdir)/"imatrix.dat"
175
+
176
+ if use_imatrix:
177
+ if train_data_file:
178
+ train_data_path = train_data_file.name
179
+ else:
180
+ train_data_path = "llama.cpp/groups_merged.txt" #fallback calibration dataset
181
+
182
+ print(f"Training data file path: {train_data_path}")
183
+
184
+ if not os.path.isfile(train_data_path):
185
+ raise Exception(f"Training data file not found: {train_data_path}")
186
+
187
+ generate_importance_matrix(fp16, train_data_path, imatrix_path)
188
+ else:
189
+ print("Not using imatrix quantization.")
190
+
191
+ # Quantize the model
192
+ quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
193
+ quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
194
+ if use_imatrix:
195
+ quantise_ggml = [
196
+ "./llama.cpp/llama-quantize",
197
+ "--imatrix", imatrix_path, fp16, quantized_gguf_path, imatrix_q_method
198
+ ]
199
+ else:
200
+ quantise_ggml = [
201
+ "./llama.cpp/llama-quantize",
202
+ fp16, quantized_gguf_path, q_method
203
+ ]
204
+ result = subprocess.run(quantise_ggml, shell=False, capture_output=True)
205
  if result.returncode != 0:
206
+ stderr_str = result.stderr.decode("utf-8")
207
+ raise Exception(f"Error quantizing: {stderr_str}")
208
+ print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
209
+ print(f"Quantized model path: {quantized_gguf_path}")
210
 
211
+ # Create empty repo
212
+ username = whoami(oauth_token.token)["name"]
213
+ new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{imatrix_q_method if use_imatrix else q_method}-GGUF", exist_ok=True, private=private_repo)
214
+ new_repo_id = new_repo_url.repo_id
215
+ print("Repo created successfully!", new_repo_url)
216
 
217
+ try:
218
+ card = ModelCard.load(model_id, token=oauth_token.token)
219
+ except:
220
+ card = ModelCard("")
221
+ if card.data.tags is None:
222
+ card.data.tags = []
223
+ card.data.tags.append("llama-cpp")
224
+ card.data.tags.append("gguf-my-repo")
225
+ card.data.base_model = model_id
226
+ card.text = dedent(
227
+ f"""
228
+ # {new_repo_id}
229
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
230
+ Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
231
+
232
+ ## Use with llama.cpp
233
+ Install llama.cpp through brew (works on Mac and Linux)
234
+
235
+ ```bash
236
+ brew install llama.cpp
237
+
238
+ ```
239
+ Invoke the llama.cpp server or the CLI.
240
+
241
+ ### CLI:
242
+ ```bash
243
+ llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
244
+ ```
245
+
246
+ ### Server:
247
+ ```bash
248
+ llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
249
+ ```
250
+
251
+ Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
252
+
253
+ Step 1: Clone llama.cpp from GitHub.
254
+ ```
255
+ git clone https://github.com/ggerganov/llama.cpp
256
+ ```
257
+
258
+ Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
259
+ ```
260
+ cd llama.cpp && LLAMA_CURL=1 make
261
+ ```
262
+
263
+ Step 3: Run inference through the main binary.
264
+ ```
265
+ ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
266
+ ```
267
+ or
268
+ ```
269
+ ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
270
+ ```
271
+ """
272
+ )
273
+ readme_path = Path(outdir)/"README.md"
274
+ card.save(readme_path)
275
 
276
+ if split_model:
277
+ split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
278
+ else:
279
+ try:
280
+ print(f"Uploading quantized model: {quantized_gguf_path}")
281
+ api.upload_file(
282
+ path_or_fileobj=quantized_gguf_path,
283
+ path_in_repo=quantized_gguf_name,
284
+ repo_id=new_repo_id,
285
+ )
286
+ except Exception as e:
287
+ raise Exception(f"Error uploading quantized model: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ if os.path.isfile(imatrix_path):
290
+ try:
291
+ print(f"Uploading imatrix.dat: {imatrix_path}")
292
+ api.upload_file(
293
+ path_or_fileobj=imatrix_path,
294
+ path_in_repo="imatrix.dat",
295
+ repo_id=new_repo_id,
296
+ )
297
+ except Exception as e:
298
+ raise Exception(f"Error uploading imatrix.dat: {e}")
299
+
300
+ api.upload_file(
301
+ path_or_fileobj=readme_path,
302
+ path_in_repo="README.md",
303
+ repo_id=new_repo_id,
304
+ )
305
+ print(f"Uploaded successfully with {imatrix_q_method if use_imatrix else q_method} option!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ # end of the TemporaryDirectory(dir="outputs") block; temporary outputs are deleted here
 
 
 
 
 
308
 
309
  return (
310
+ f'<h1>✅ DONE</h1><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
311
  "llama.png",
312
  )
313
  except Exception as e:
314
+ return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
315
 
316
 
317
  css="""/* Custom CSS to allow scrolling */
 
379
 
380
  split_max_size = gr.Textbox(
381
  label="Max File Size",
382
+ info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
383
  visible=False
384
  )
385