MekkCyber commited on
Commit
00178b2
·
1 Parent(s): 7bf7dc3

final maybe

Browse files
Files changed (2) hide show
  1. app.py +221 -54
  2. app_claude.py +385 -457
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
4
  import tempfile
5
  from huggingface_hub import HfApi
6
  from huggingface_hub import list_models
@@ -17,14 +17,17 @@ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) ->
17
  return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
18
  return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
19
 
20
- def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
 
 
 
21
  """Check if a model exists in the user's Hugging Face repository."""
22
  try:
23
  models = list_models(author=username, token=oauth_token.token)
24
  model_names = [model.id for model in models]
25
- if quantized_model_name :
26
  repo_name = f"{username}/{quantized_model_name}"
27
- else :
28
  repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
29
 
30
  if repo_name in model_names:
@@ -34,7 +37,10 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name,
34
  except Exception as e:
35
  return f"Error checking model existence: {str(e)}"
36
 
37
- def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4):
 
 
 
38
  model_card = f"""---
39
  base_model:
40
  - {model_name}
@@ -58,23 +64,31 @@ You can use this model in your applications by loading it directly from the Hugg
58
  from transformers import AutoModel
59
 
60
  model = AutoModel.from_pretrained("{model_name}")"""
61
-
62
  return model_card
63
 
64
- def load_model(model_name, quantization_config, auth_token) :
65
- return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
66
 
67
  DTYPE_MAPPING = {
68
  "int8": torch.int8,
69
  "uint8": torch.uint8,
70
  "float16": torch.float16,
71
- "float32": torch.float32,
72
  "bfloat16": torch.bfloat16,
73
  }
74
 
75
 
76
- def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
77
- print(f"Quantizing model: {quant_type_4}")
 
 
 
 
 
 
 
 
 
 
78
  quantization_config = BitsAndBytesConfig(
79
  load_in_4bit=True,
80
  bnb_4bit_quant_type=quant_type_4,
@@ -83,61 +97,114 @@ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, qua
83
  bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
84
  )
85
 
86
- model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token, torch_dtype=torch.bfloat16)
87
- for _ , module in model.named_modules():
 
 
 
 
 
 
 
 
 
 
 
88
  if isinstance(module, Linear4bit):
89
  module.to("cuda")
90
  module.to("cpu")
 
 
 
91
  return model
92
 
93
- def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
94
- print("Saving quantized model")
95
- with tempfile.TemporaryDirectory() as tmpdirname:
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
99
- if quantized_model_name :
100
  repo_name = f"{username}/{quantized_model_name}"
101
- else :
102
  repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
103
-
104
 
105
- model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
 
 
106
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
107
  f.write(model_card)
 
 
108
  # Push to Hub
109
  api = HfApi(token=auth_token.token)
110
  api.create_repo(repo_name, exist_ok=True, private=not public)
 
 
 
111
  api.upload_folder(
112
  folder_path=tmpdirname,
113
  repo_id=repo_name,
114
  repo_type="model",
115
  )
 
 
116
  # Get model architecture as string
117
  import io
118
  from contextlib import redirect_stdout
119
  import html
120
-
121
  # Capture the model architecture string
122
  f = io.StringIO()
123
  with redirect_stdout(f):
124
  print(model)
125
  model_architecture_str = f.getvalue()
126
-
127
  # Escape HTML characters and format with line breaks
128
- model_architecture_str_html = html.escape(model_architecture_str).replace('\n', '<br/>')
129
-
 
 
130
  # Format it for display in markdown with proper styling
131
  model_architecture_info = f"""
132
  <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
133
  <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
134
  </div>
135
  """
136
-
137
  return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
138
 
139
- def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
140
- if oauth_token is None :
 
 
 
 
 
 
 
 
 
 
 
 
141
  return """
142
  <div class="error-box">
143
  <h3>❌ Authentication Error</h3>
@@ -150,9 +217,11 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
150
  <h3>❌ Authentication Error</h3>
151
  <p>Please sign in to your HuggingFace account to use the quantizer.</p>
152
  </div>
153
- """
154
- exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
155
- if exists_message :
 
 
156
  return f"""
157
  <div class="warning-box">
158
  <h3>⚠️ Model Already Exists</h3>
@@ -160,10 +229,35 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
160
  </div>
161
  """
162
  try:
163
- quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
164
- return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
165
- except Exception as e :
166
- error_message = str(e).replace('\n', '<br/>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  return f"""
168
  <div class="error-box">
169
  <h3>❌ Error Occurred</h3>
@@ -172,7 +266,7 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
172
  """
173
 
174
 
175
- css="""/* Custom CSS to allow scrolling */
176
  .gradio-container {overflow-y: auto;}
177
 
178
  /* Fix alignment for radio buttons and checkboxes */
@@ -342,7 +436,40 @@ button[variant="primary"]:hover {
342
  #toggle-button:hover::after {
343
  left: 100%;
344
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  """
347
 
348
 
@@ -358,8 +485,8 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
358
 
359
  m1 = gr.Markdown()
360
  demo.load(hello, inputs=None, outputs=m1)
361
-
362
- instructions_visible = gr.State(False)
363
 
364
  with gr.Row():
365
  with gr.Column():
@@ -370,7 +497,7 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
370
  search_type="model",
371
  )
372
  with gr.Row():
373
- with gr.Column():
374
  gr.Markdown(
375
  """
376
  ### ⚙️ Model Quantization Type Settings
@@ -381,21 +508,21 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
381
  choices=["fp4", "nf4"],
382
  value="nf4",
383
  visible=True,
384
- show_label=False
385
  )
386
  compute_type_4 = gr.Dropdown(
387
  info="The compute type for the model",
388
  choices=["float16", "bfloat16", "float32"],
389
  value="bfloat16",
390
  visible=True,
391
- show_label=False
392
  )
393
  quant_storage_4 = gr.Dropdown(
394
  info="The storage type for the model",
395
  choices=["float16", "float32", "int8", "uint8", "bfloat16"],
396
  value="uint8",
397
  visible=True,
398
- show_label=False
399
  )
400
  gr.Markdown(
401
  """
@@ -404,11 +531,11 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
404
  )
405
  with gr.Row(elem_classes="option-row"):
406
  double_quant_4 = gr.Radio(
407
- ["True", "False"],
408
- info="Use Double Quant",
409
- visible=True,
410
  value="True",
411
- show_label=False
412
  )
413
  gr.Markdown(
414
  """
@@ -424,26 +551,66 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
424
  elem_classes="model-name-textbox",
425
  show_label=False,
426
  )
427
-
428
  with gr.Row():
429
  public = gr.Checkbox(
430
  label="🌐 Make model public",
431
  info="If checked, the model will be publicly accessible",
432
  value=True,
433
  interactive=True,
434
- show_label=True
435
  )
436
 
437
  with gr.Column():
438
- quantize_button = gr.Button("🚀 Quantize and Push to the Hub", variant="primary")
439
- output_link = gr.Markdown("🔗 Quantized Model", container=True, min_height=100)
440
-
 
 
 
 
441
  quantize_button.click(
442
  fn=quantize_and_save,
443
- inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
444
- outputs=[output_link]
 
 
 
 
 
 
 
 
 
445
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
 
447
  if __name__ == "__main__":
448
  demo.launch(share=True)
449
-
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModel, BitsAndBytesConfig
4
  import tempfile
5
  from huggingface_hub import HfApi
6
  from huggingface_hub import list_models
 
17
  return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
18
  return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
19
 
20
+
21
+ def check_model_exists(
22
+ oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name
23
+ ):
24
  """Check if a model exists in the user's Hugging Face repository."""
25
  try:
26
  models = list_models(author=username, token=oauth_token.token)
27
  model_names = [model.id for model in models]
28
+ if quantized_model_name:
29
  repo_name = f"{username}/{quantized_model_name}"
30
+ else:
31
  repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
32
 
33
  if repo_name in model_names:
 
37
  except Exception as e:
38
  return f"Error checking model existence: {str(e)}"
39
 
40
+
41
+ def create_model_card(
42
+ model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4
43
+ ):
44
  model_card = f"""---
45
  base_model:
46
  - {model_name}
 
64
  from transformers import AutoModel
65
 
66
  model = AutoModel.from_pretrained("{model_name}")"""
67
+
68
  return model_card
69
 
 
 
70
 
71
  DTYPE_MAPPING = {
72
  "int8": torch.int8,
73
  "uint8": torch.uint8,
74
  "float16": torch.float16,
75
+ "float32": torch.float32,
76
  "bfloat16": torch.bfloat16,
77
  }
78
 
79
 
80
+ def quantize_model(
81
+ model_name,
82
+ quant_type_4,
83
+ double_quant_4,
84
+ compute_type_4,
85
+ quant_storage_4,
86
+ auth_token=None,
87
+ progress=gr.Progress(),
88
+ ):
89
+ progress(0, desc="Loading model")
90
+
91
+ # Configure quantization
92
  quantization_config = BitsAndBytesConfig(
93
  load_in_4bit=True,
94
  bnb_4bit_quant_type=quant_type_4,
 
97
  bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
98
  )
99
 
100
+ # Load model
101
+ model = AutoModel.from_pretrained(
102
+ model_name,
103
+ quantization_config=quantization_config,
104
+ device_map="cpu",
105
+ use_auth_token=auth_token.token,
106
+ torch_dtype=torch.bfloat16,
107
+ )
108
+ progress(0.33, desc="Quantizing")
109
+
110
+ # Quantize model
111
+ modules = list(model.named_modules())
112
+ for idx, (_, module) in enumerate(modules):
113
  if isinstance(module, Linear4bit):
114
  module.to("cuda")
115
  module.to("cpu")
116
+ progress(0.33 + (0.33 * idx / len(modules)), desc="Quantizing")
117
+
118
+ progress(0.66, desc="Quantized successfully")
119
  return model
120
 
 
 
 
121
 
122
+ def save_model(
123
+ model,
124
+ model_name,
125
+ quant_type_4,
126
+ double_quant_4,
127
+ compute_type_4,
128
+ quant_storage_4,
129
+ username=None,
130
+ auth_token=None,
131
+ quantized_model_name=None,
132
+ public=False,
133
+ progress=gr.Progress(),
134
+ ):
135
+ progress(0.67, desc="Preparing to push")
136
+
137
+ with tempfile.TemporaryDirectory() as tmpdirname:
138
+ # Save model
139
+ model.save_pretrained(
140
+ tmpdirname, safe_serialization=True, use_auth_token=auth_token.token
141
+ )
142
+ progress(0.75, desc="Preparing to push")
143
 
144
+ # Prepare repo name and model card
145
+ if quantized_model_name:
146
  repo_name = f"{username}/{quantized_model_name}"
147
+ else:
148
  repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
 
149
 
150
+ model_card = create_model_card(
151
+ repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4
152
+ )
153
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
154
  f.write(model_card)
155
+ progress(0.80, desc="Model card created")
156
+
157
  # Push to Hub
158
  api = HfApi(token=auth_token.token)
159
  api.create_repo(repo_name, exist_ok=True, private=not public)
160
+ progress(0.85, desc="Pushing to Hub")
161
+
162
+ # Upload files
163
  api.upload_folder(
164
  folder_path=tmpdirname,
165
  repo_id=repo_name,
166
  repo_type="model",
167
  )
168
+ progress(1.00, desc="Model pushed to Hub")
169
+
170
  # Get model architecture as string
171
  import io
172
  from contextlib import redirect_stdout
173
  import html
174
+
175
  # Capture the model architecture string
176
  f = io.StringIO()
177
  with redirect_stdout(f):
178
  print(model)
179
  model_architecture_str = f.getvalue()
180
+
181
  # Escape HTML characters and format with line breaks
182
+ model_architecture_str_html = html.escape(model_architecture_str).replace(
183
+ "\n", "<br/>"
184
+ )
185
+
186
  # Format it for display in markdown with proper styling
187
  model_architecture_info = f"""
188
  <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
189
  <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
190
  </div>
191
  """
 
192
  return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
193
 
194
+
195
+ def quantize_and_save(
196
+ profile: gr.OAuthProfile | None,
197
+ oauth_token: gr.OAuthToken | None,
198
+ model_name,
199
+ quant_type_4,
200
+ double_quant_4,
201
+ compute_type_4,
202
+ quant_storage_4,
203
+ quantized_model_name,
204
+ public,
205
+ progress=gr.Progress(),
206
+ ):
207
+ if oauth_token is None:
208
  return """
209
  <div class="error-box">
210
  <h3>❌ Authentication Error</h3>
 
217
  <h3>❌ Authentication Error</h3>
218
  <p>Please sign in to your HuggingFace account to use the quantizer.</p>
219
  </div>
220
+ """
221
+ exists_message = check_model_exists(
222
+ oauth_token, profile.username, model_name, quantized_model_name
223
+ )
224
+ if exists_message:
225
  return f"""
226
  <div class="warning-box">
227
  <h3>⚠️ Model Already Exists</h3>
 
229
  </div>
230
  """
231
  try:
232
+ # Download phase
233
+ progress(0, desc="Starting quantization process")
234
+ quantized_model = quantize_model(
235
+ model_name,
236
+ quant_type_4,
237
+ double_quant_4,
238
+ compute_type_4,
239
+ quant_storage_4,
240
+ oauth_token,
241
+ progress,
242
+ )
243
+ final_message = save_model(
244
+ quantized_model,
245
+ model_name,
246
+ quant_type_4,
247
+ double_quant_4,
248
+ compute_type_4,
249
+ quant_storage_4,
250
+ profile.username,
251
+ oauth_token,
252
+ quantized_model_name,
253
+ public,
254
+ progress,
255
+ )
256
+
257
+ return final_message
258
+
259
+ except Exception as e:
260
+ error_message = str(e).replace("\n", "<br/>")
261
  return f"""
262
  <div class="error-box">
263
  <h3>❌ Error Occurred</h3>
 
266
  """
267
 
268
 
269
+ css = """/* Custom CSS to allow scrolling */
270
  .gradio-container {overflow-y: auto;}
271
 
272
  /* Fix alignment for radio buttons and checkboxes */
 
436
  #toggle-button:hover::after {
437
  left: 100%;
438
  }
439
+ /* Progress Bar Styles */
440
+ .progress-container {
441
+ font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
442
+ padding: 20px;
443
+ background: white;
444
+ border-radius: 12px;
445
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
446
+ }
447
+
448
+ .progress-stage {
449
+ font-size: 0.9rem;
450
+ font-weight: 600;
451
+ color: #64748b;
452
+ }
453
 
454
+ .progress-stage .stage {
455
+ position: relative;
456
+ padding: 8px 12px;
457
+ border-radius: 6px;
458
+ background: #f1f5f9;
459
+ transition: all 0.3s ease;
460
+ }
461
+
462
+ .progress-stage .stage.completed {
463
+ background: #ecfdf5;
464
+ }
465
+
466
+ .progress-bar {
467
+ box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
468
+ }
469
+ .progress {
470
+ transition: width 0.8s cubic-bezier(0.4, 0, 0.2, 1);
471
+ box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);
472
+ }
473
  """
474
 
475
 
 
485
 
486
  m1 = gr.Markdown()
487
  demo.load(hello, inputs=None, outputs=m1)
488
+
489
+ instructions_visible = gr.State(False)
490
 
491
  with gr.Row():
492
  with gr.Column():
 
497
  search_type="model",
498
  )
499
  with gr.Row():
500
+ with gr.Column():
501
  gr.Markdown(
502
  """
503
  ### ⚙️ Model Quantization Type Settings
 
508
  choices=["fp4", "nf4"],
509
  value="nf4",
510
  visible=True,
511
+ show_label=False,
512
  )
513
  compute_type_4 = gr.Dropdown(
514
  info="The compute type for the model",
515
  choices=["float16", "bfloat16", "float32"],
516
  value="bfloat16",
517
  visible=True,
518
+ show_label=False,
519
  )
520
  quant_storage_4 = gr.Dropdown(
521
  info="The storage type for the model",
522
  choices=["float16", "float32", "int8", "uint8", "bfloat16"],
523
  value="uint8",
524
  visible=True,
525
+ show_label=False,
526
  )
527
  gr.Markdown(
528
  """
 
531
  )
532
  with gr.Row(elem_classes="option-row"):
533
  double_quant_4 = gr.Radio(
534
+ ["True", "False"],
535
+ info="Use Double Quant",
536
+ visible=True,
537
  value="True",
538
+ show_label=False,
539
  )
540
  gr.Markdown(
541
  """
 
551
  elem_classes="model-name-textbox",
552
  show_label=False,
553
  )
554
+
555
  with gr.Row():
556
  public = gr.Checkbox(
557
  label="🌐 Make model public",
558
  info="If checked, the model will be publicly accessible",
559
  value=True,
560
  interactive=True,
561
+ show_label=True,
562
  )
563
 
564
  with gr.Column():
565
+ quantize_button = gr.Button(
566
+ "🚀 Quantize and Push to the Hub", variant="primary"
567
+ )
568
+ output_link = gr.Markdown(
569
+ "🔗 Quantized Model", container=True, min_height=100
570
+ )
571
+
572
  quantize_button.click(
573
  fn=quantize_and_save,
574
+ inputs=[
575
+ model_name,
576
+ quant_type_4,
577
+ double_quant_4,
578
+ compute_type_4,
579
+ quant_storage_4,
580
+ quantized_model_name,
581
+ public,
582
+ ],
583
+ outputs=[output_link],
584
+ show_progress="full",
585
  )
586
+ # Add information section about the app options
587
+ with gr.Accordion("📚 About this app", open=True):
588
+ gr.Markdown(
589
+ """
590
+ ## 📝 Notes on Quantization Options
591
+
592
+ ### Quantization Type (bnb_4bit_quant_type)
593
+ - **fp4**: Floating-point 4-bit quantization.
594
+ - **nf4**: Normal float 4-bit quantization.
595
+
596
+ ### Double Quantization
597
+ - **True**: Applies a second round of quantization to the quantization constants, further reducing memory usage.
598
+ - **False**: Uses standard quantization only.
599
+
600
+ ### Model Saving Options
601
+ - **Model Name**: Custom name for your quantized model on the Hub. If left empty, a default name will be generated.
602
+ - **Make model public**: If checked, anyone can access your quantized model. If unchecked, only you can access it.
603
+
604
+ ## 🔍 How It Works
605
+ This app uses the BitsAndBytes library to perform 4-bit quantization on Transformer models. The process:
606
+ 1. Downloads the original model
607
+ 2. Applies the selected quantization settings
608
+ 3. Uploads the quantized model to your HuggingFace account
609
+
610
+ ## 📊 Memory Usage
611
+ 4-bit quantization can reduce model size by up to 75% compared to FP16, allowing you to run larger models on consumer hardware.
612
+ """
613
+ )
614
 
615
  if __name__ == "__main__":
616
  demo.launch(share=True)
 
app_claude.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
4
  import tempfile
5
  from huggingface_hub import HfApi
6
  from huggingface_hub import list_models
@@ -8,12 +8,14 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
  from bitsandbytes.nn import Linear4bit
9
  from packaging import version
10
  import os
11
-
12
 
13
  def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
 
 
14
  if profile is None:
15
- return "👋 Hello! Sign in to get started with the BitsAndBytes Quantizer."
16
- return f"👋 Hello {profile.name}! Welcome to the BitsAndBytes Quantizer."
17
 
18
  def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
19
  """Check if a model exists in the user's Hugging Face repository."""
@@ -23,7 +25,7 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name,
23
  if quantized_model_name :
24
  repo_name = f"{username}/{quantized_model_name}"
25
  else :
26
- repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
27
 
28
  if repo_name in model_names:
29
  return f"Model '{repo_name}' already exists in your repository."
@@ -59,9 +61,6 @@ model = AutoModel.from_pretrained("{model_name}")"""
59
 
60
  return model_card
61
 
62
- def load_model(model_name, quantization_config, auth_token) :
63
- return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
64
-
65
  DTYPE_MAPPING = {
66
  "int8": torch.int8,
67
  "uint8": torch.uint8,
@@ -71,7 +70,9 @@ DTYPE_MAPPING = {
71
  }
72
 
73
 
74
- def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
 
 
75
  print(f"Quantizing model: {quant_type_4}")
76
  quantization_config = BitsAndBytesConfig(
77
  load_in_4bit=True,
@@ -80,9 +81,9 @@ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, qua
80
  bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
81
  bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
82
  )
 
83
 
84
- model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
85
- for _ , module in model.named_modules():
86
  if isinstance(module, Linear4bit):
87
  module.to("cuda")
88
  module.to("cpu")
@@ -91,12 +92,14 @@ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, qua
91
  def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
92
  print("Saving quantized model")
93
  with tempfile.TemporaryDirectory() as tmpdirname:
 
 
94
  model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
95
  if quantized_model_name :
96
  repo_name = f"{username}/{quantized_model_name}"
97
  else :
98
- repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
99
-
100
  model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
101
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
102
  f.write(model_card)
@@ -108,15 +111,27 @@ def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4,
108
  repo_id=repo_name,
109
  repo_type="model",
110
  )
111
- return f"""
112
- <div class="success-box">
113
- <h2>🎉 Quantization Complete!</h2>
114
- <p>Your quantized model is now available at:</p>
115
- <a href="https://huggingface.co/{repo_name}" target="_blank" class="model-link">
116
- huggingface.co/{repo_name}
117
- </a>
 
 
 
 
 
 
 
 
 
 
 
118
  </div>
119
  """
 
120
 
121
  def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
122
  if oauth_token is None :
@@ -132,7 +147,7 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
132
  <h3>❌ Authentication Error</h3>
133
  <p>Please sign in to your HuggingFace account to use the quantizer.</p>
134
  </div>
135
- """
136
  exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
137
  if exists_message :
138
  return f"""
@@ -142,537 +157,450 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
142
  </div>
143
  """
144
  try:
 
145
  quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
146
- return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
 
 
 
147
  except Exception as e :
148
- print(e)
149
  return f"""
150
  <div class="error-box">
151
  <h3>❌ Error Occurred</h3>
152
- <p>{str(e)}</p>
153
  </div>
154
  """
155
 
156
- css = """
157
- :root {
158
- --primary: #6366f1;
159
- --primary-light: #818cf8;
160
- --primary-dark: #4f46e5;
161
- --secondary: #10b981;
162
- --accent: #f97316;
163
- --background: #f8fafc;
164
- --text: #1e293b;
165
- --card-bg: #ffffff;
166
- --input-bg: #f1f5f9;
167
- --error: #ef4444;
168
- --warning: #f59e0b;
169
- --success: #10b981;
170
- --border-radius: 12px;
171
- --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
172
- --transition: all 0.3s ease;
173
- }
174
 
175
- body, .gradio-container {
176
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif;
177
- color: var(--text);
178
- background-color: var(--background);
179
- }
180
-
181
- h1 {
182
- font-size: 2.5rem !important;
183
- font-weight: 800 !important;
184
- text-align: center;
185
- background: linear-gradient(45deg, var(--primary), var(--accent));
186
- -webkit-background-clip: text;
187
- background-clip: text;
188
- color: transparent !important;
189
- margin-bottom: 1rem !important;
190
- padding: 1rem 0 !important;
191
- }
192
 
193
- h2 {
194
- font-size: 1.75rem !important;
195
- font-weight: 700 !important;
196
- color: var(--primary-dark) !important;
197
- margin-top: 1.5rem !important;
198
- margin-bottom: 1rem !important;
199
- }
200
-
201
- h3 {
202
- font-size: 1.25rem !important;
203
- font-weight: 600 !important;
204
- color: var(--primary) !important;
205
- margin-top: 1rem !important;
206
- margin-bottom: 0.5rem !important;
207
- border-bottom: 2px solid var(--primary-light);
208
- padding-bottom: 0.5rem;
209
- width: fit-content;
210
  }
211
 
212
- /* Main container styling */
213
- .main-container {
214
- max-width: 1200px;
215
- margin: 0 auto;
216
- padding: 2rem;
217
- background-color: var(--card-bg);
218
- border-radius: var(--border-radius);
219
- box-shadow: var(--shadow);
220
  }
221
 
222
- /* Button styling */
223
- button {
224
- border-radius: var(--border-radius) !important;
225
- font-weight: 600 !important;
226
- transition: var(--transition) !important;
227
- text-transform: uppercase;
228
- letter-spacing: 0.5px;
229
  }
230
 
231
- button.primary {
232
- background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
233
- border: none !important;
234
- color: white !important;
235
- padding: 12px 24px !important;
236
- box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
 
237
  }
238
 
239
- button.primary:hover {
240
- transform: translateY(-2px) !important;
241
- box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
242
  }
243
 
244
- /* Login button styling */
245
- #login-button {
246
- margin: 1.5rem auto !important;
247
- min-width: 200px !important;
248
- background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
249
- color: white !important;
250
- font-weight: 600 !important;
251
- padding: 12px 24px !important;
252
- border-radius: var(--border-radius) !important;
253
- border: none !important;
254
- box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
255
- transition: var(--transition) !important;
256
  }
257
 
258
- #login-button:hover {
259
- transform: translateY(-2px) !important;
260
- box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
261
  }
262
 
263
- /* Toggle button styling */
264
- #toggle-button {
265
- background: transparent !important;
266
- color: var(--primary) !important;
267
- border: 2px solid var(--primary-light) !important;
268
- padding: 8px 16px !important;
269
- margin: 1rem 0 !important;
270
- border-radius: var(--border-radius) !important;
271
- transition: var(--transition) !important;
272
- font-weight: 600 !important;
273
  }
274
 
275
- #toggle-button:hover {
276
- background-color: var(--primary-light) !important;
 
277
  color: white !important;
 
 
 
 
 
 
 
 
 
 
278
  }
279
 
280
- /* Input fields styling */
281
- input, select, textarea {
282
- border-radius: var(--border-radius) !important;
283
- border: 2px solid var(--input-bg) !important;
284
- padding: 10px 16px !important;
285
- background-color: var(--input-bg) !important;
286
- transition: var(--transition) !important;
287
- }
288
-
289
- input:focus, select:focus, textarea:focus {
290
- border-color: var(--primary-light) !important;
291
- box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important;
292
- }
293
-
294
- /* Dropdown styling with nice hover effects */
295
- .gradio-dropdown > div {
296
- border-radius: var(--border-radius) !important;
297
- border: 2px solid var(--input-bg) !important;
298
- overflow: hidden !important;
299
- transition: var(--transition) !important;
300
- }
301
-
302
- .gradio-dropdown > div:hover {
303
- border-color: var(--primary-light) !important;
304
- }
305
-
306
- /* Radio and checkbox styling */
307
- .gradio-radio, .gradio-checkbox {
308
- background-color: var(--card-bg) !important;
309
- border-radius: var(--border-radius) !important;
310
- padding: 12px !important;
311
- margin-bottom: 16px !important;
312
- transition: var(--transition) !important;
313
- border: 2px solid var(--input-bg) !important;
314
  }
315
 
316
- .gradio-radio:hover, .gradio-checkbox:hover {
317
- border-color: var(--primary-light) !important;
 
318
  }
319
 
320
- .gradio-radio input[type="radio"] + label {
321
- padding: 8px 12px !important;
322
- border-radius: 20px !important;
323
- margin-right: 8px !important;
324
- background-color: var(--input-bg) !important;
325
- transition: var(--transition) !important;
 
326
  }
327
 
328
- .gradio-radio input[type="radio"]:checked + label {
329
- background-color: var(--primary) !important;
 
330
  color: white !important;
 
 
 
 
 
 
 
 
 
 
331
  }
332
 
333
- /* Custom spacing and layout */
334
- .gradio-row {
335
- margin-bottom: 24px !important;
336
- }
337
-
338
- .option-row {
339
- display: flex !important;
340
- gap: 16px !important;
341
- margin-bottom: 16px !important;
342
- }
343
-
344
- /* Card-like sections */
345
- .card-section {
346
- background-color: var(--card-bg) !important;
347
- border-radius: var(--border-radius) !important;
348
- padding: 20px !important;
349
- margin-bottom: 24px !important;
350
- box-shadow: var(--shadow) !important;
351
- border: 1px solid rgba(0, 0, 0, 0.05) !important;
352
- }
353
-
354
- /* Search box styling */
355
- .search-box input {
356
- border-radius: var(--border-radius) !important;
357
- border: 2px solid var(--input-bg) !important;
358
- padding: 12px 20px !important;
359
- box-shadow: var(--shadow) !important;
360
- transition: var(--transition) !important;
361
- }
362
-
363
- .search-box input:focus {
364
- border-color: var(--primary) !important;
365
- box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
366
- }
367
-
368
- /* Model name textbox specific styling */
369
- .model-name-textbox {
370
- border: 2px solid var(--input-bg) !important;
371
- border-radius: var(--border-radius) !important;
372
- transition: var(--transition) !important;
373
- }
374
-
375
- .model-name-textbox:focus-within {
376
- border-color: var(--primary) !important;
377
- box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
378
- }
379
-
380
- /* Success, warning and error boxes */
381
- .success-box, .warning-box, .error-box {
382
- border-radius: var(--border-radius) !important;
383
- padding: 20px !important;
384
- margin: 20px 0 !important;
385
- box-shadow: var(--shadow) !important;
386
- animation: fadeIn 0.5s ease-in-out;
387
  }
388
 
389
- .success-box {
390
- background-color: rgba(16, 185, 129, 0.1) !important;
391
- border: 2px solid var(--success) !important;
392
  }
393
 
394
- .warning-box {
395
- background-color: rgba(245, 158, 11, 0.1) !important;
396
- border: 2px solid var(--warning) !important;
 
 
 
 
 
 
397
  }
398
 
399
- .error-box {
400
- background-color: rgba(239, 68, 68, 0.1) !important;
401
- border: 2px solid var(--error) !important;
402
  }
403
 
404
- /* Model link styling */
405
- .model-link {
406
- display: inline-block !important;
407
- background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
408
  color: white !important;
409
- text-decoration: none !important;
410
- padding: 12px 24px !important;
411
- border-radius: var(--border-radius) !important;
412
  font-weight: 600 !important;
413
- margin-top: 16px !important;
414
- box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
415
- transition: var(--transition) !important;
 
 
 
 
 
 
 
 
416
  }
417
 
418
- .model-link:hover {
419
  transform: translateY(-2px) !important;
420
- box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
421
- }
422
-
423
- /* Instructions section */
424
- .instructions-container {
425
- background-color: rgba(99, 102, 241, 0.05) !important;
426
- border-left: 4px solid var(--primary) !important;
427
- padding: 16px !important;
428
- margin: 24px 0 !important;
429
- border-radius: 0 var(--border-radius) var(--border-radius) 0 !important;
430
  }
431
 
432
- /* Animations */
433
- @keyframes fadeIn {
434
- from { opacity: 0; transform: translateY(10px); }
435
- to { opacity: 1; transform: translateY(0); }
436
- }
437
-
438
- /* Responsive adjustments */
439
- @media (max-width: 768px) {
440
- .option-row {
441
- flex-direction: column !important;
442
- }
443
- }
444
-
445
- /* Add a nice gradient splash to the app */
446
- .gradio-container::before {
447
  content: "";
448
  position: absolute;
449
  top: 0;
450
- left: 0;
451
- right: 0;
452
- height: 10px;
453
- background: linear-gradient(90deg, var(--primary), var(--accent));
454
- z-index: 100;
455
- }
456
-
457
- /* Stylish header */
458
- .app-header {
459
- display: flex;
460
- flex-direction: column;
461
- align-items: center;
462
- margin-bottom: 2rem;
463
- position: relative;
464
  }
465
 
466
- .app-header::after {
467
- content: "";
468
- position: absolute;
469
- bottom: -10px;
470
- left: 50%;
471
- transform: translateX(-50%);
472
- width: 80px;
473
- height: 4px;
474
- background: linear-gradient(90deg, var(--primary), var(--accent));
475
- border-radius: 2px;
476
  }
477
-
478
- /* Section headers */
479
- .section-header {
480
- display: flex;
481
- align-items: center;
482
- margin-bottom: 1rem;
 
483
  }
484
 
485
- .section-header::before {
486
- content: "⚙️";
487
- margin-right: 8px;
488
- font-size: 1.25rem;
489
  }
490
 
491
- /* Quantize button special styling */
492
- #quantize-button {
493
- background: linear-gradient(135deg, var(--primary), var(--accent)) !important;
494
- color: white !important;
495
- padding: 16px 32px !important;
496
- font-size: 1.1rem !important;
497
- font-weight: 700 !important;
498
- border: none !important;
499
- border-radius: var(--border-radius) !important;
500
- box-shadow: 0 4px 15px -3px rgba(99, 102, 241, 0.5) !important;
501
- transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
502
  position: relative;
503
- overflow: hidden;
 
 
 
504
  }
505
 
506
- #quantize-button:hover {
507
- transform: translateY(-3px) !important;
508
- box-shadow: 0 7px 20px -2px rgba(99, 102, 241, 0.6) !important;
509
  }
510
 
511
- #quantize-button::after {
512
- content: "";
513
- position: absolute;
514
- top: 0;
515
- left: 0;
516
- width: 100%;
517
- height: 100%;
518
- background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0));
519
- transform: translateY(-100%);
520
- transition: transform 0.6s cubic-bezier(0.25, 0.8, 0.25, 1);
521
  }
522
-
523
- #quantize-button:hover::after {
524
- transform: translateY(0);
525
  }
526
  """
527
 
528
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald"), css=css) as demo:
529
- with gr.Column(elem_classes="main-container"):
530
- with gr.Row(elem_classes="app-header"):
531
- gr.Markdown(
532
- """
533
- <h1 style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;"> 🤗 BitsAndBytes Model Quantizer</h1>
534
-
535
- <div style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;">
536
- Welcome to the BitsAndBytes Model Quantizer!
537
- </div>
538
- """
539
- )
540
-
541
- gr.LoginButton(elem_id="login-button", elem_classes="login-button")
542
-
543
- welcome_msg = gr.Markdown(elem_classes="welcome-message")
544
- demo.load(hello, inputs=None, outputs=welcome_msg)
545
-
546
- instructions = gr.Markdown(
547
- """
548
- <div class="instructions-container">
549
- <h3>📋 Instructions</h3>
550
- <ol>
551
- <li>Login to your HuggingFace account</li>
552
- <li>Enter the name of the Hugging Face LLM model you want to quantize</li>
553
- <li>Configure quantization settings based on your needs</li>
554
- <li>Optionally, specify a custom name for the quantized model</li>
555
- <li>Click "Quantize Model" to start the process</li>
556
- </ol>
557
- <p><strong>Note:</strong> Processing time depends on model size and your hardware. Check container logs for progress!</p>
558
- </div>
559
- """,
560
- visible=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
- instructions_visible = gr.State(False)
564
- toggle_button = gr.Button("▼ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button")
565
 
566
- def toggle_instructions(instructions_visible):
567
- new_visibility = not instructions_visible
568
- new_label = "▲ Hide Instructions" if new_visibility else "▼ Show Instructions"
569
- return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label)
570
 
571
- toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button])
572
-
573
- with gr.Row(elem_classes="app-content"):
574
- with gr.Column(scale=1, elem_classes="card-section"):
575
- with gr.Row(elem_classes="search-section"):
576
- model_name = HuggingfaceHubSearch(
577
- label="🔍 Select Model",
578
- placeholder=" Search for model on Huggingface Hub...",
579
- search_type="model",
580
- elem_classes="search-box"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
  )
582
-
583
- with gr.Row(elem_classes="section-header"):
584
- gr.Markdown("### Quantization Settings")
585
-
586
- with gr.Column(elem_classes="settings-group"):
587
- gr.Markdown("**Quantization Type**", elem_classes="setting-label")
588
  quant_type_4 = gr.Dropdown(
 
589
  choices=["fp4", "nf4"],
590
- value="fp4",
591
- label="Format",
592
- info="The quantization data type in bnb.nn.Linear4Bit layers",
593
  show_label=False
594
  )
595
-
596
- gr.Markdown("**Compute Settings**", elem_classes="setting-label")
597
  compute_type_4 = gr.Dropdown(
 
598
  choices=["float16", "bfloat16", "float32"],
599
- value="float32",
600
- label="Compute Type",
601
- info="The compute dtype for matrix multiplication"
602
  )
603
-
604
  quant_storage_4 = gr.Dropdown(
 
605
  choices=["float16", "float32", "int8", "uint8", "bfloat16"],
606
  value="uint8",
607
- label="Storage Type",
608
- info="The storage type for quantized weights"
609
  )
610
-
611
- gr.Markdown("**Double Quantization**", elem_classes="setting-label")
612
- double_quant_4 = gr.Radio(
613
- ["False", "True"],
614
- label="Use Double Quantization",
615
- info="Further compress model size with nested quantization",
616
- value="False",
617
  )
618
-
619
- with gr.Row(elem_classes="section-header"):
620
- gr.Markdown("### Output Settings")
621
-
622
- with gr.Column(elem_classes="settings-group"):
623
- quantized_model_name = gr.Textbox(
624
- label="Custom Model Name (Optional)",
625
- info="Leave blank to use default naming convention",
626
- placeholder="my-quantized-model",
627
- elem_classes="model-name-textbox"
 
 
628
  )
 
 
 
 
 
 
 
 
 
629
 
630
- public = gr.Checkbox(
631
- label="Make model public",
632
- info="If checked, your model will be publicly accessible on Hugging Face Hub",
633
- value=False,
634
- )
 
 
 
 
 
 
 
635
 
636
- with gr.Column(scale=1, elem_classes="card-section"):
637
- with gr.Row():
638
- gr.Markdown("""
639
- ### 📊 Quantization Benefits
640
-
641
- <div style="background-color: rgba(99, 102, 241, 0.05); padding: 12px; border-radius: 8px; margin-bottom: 16px;">
642
- <p><strong>⚡ Lower Memory Usage:</strong> Reduce model size by up to 75%</p>
643
- <p><strong>🚀 Faster Inference:</strong> Achieve better performance on resource-constrained hardware</p>
644
- <p><strong>💻 Wider Compatibility:</strong> Run models on devices with limited VRAM</p>
645
- </div>
646
-
647
- ### 🔧 Configuration Guide
648
-
649
- <div style="background-color: rgba(16, 185, 129, 0.05); padding: 12px; border-radius: 8px;">
650
- <p><strong>Quantization Type:</strong></p>
651
- <ul>
652
- <li><code>fp4</code> - 4-bit floating point (better for most cases)</li>
653
- <li><code>nf4</code> - normalized float format (better for specific models)</li>
654
- </ul>
655
- <p><strong>Double Quantization:</strong> Enable for additional compression with minimal quality loss</p>
656
- </div>
657
- """)
658
-
659
- with gr.Row():
660
- quantize_button = gr.Button("🚀 Quantize Model", variant="primary", elem_id="quantize-button")
661
-
662
- output_link = gr.HTML(label="Results", elem_classes="results-container")
663
-
664
- # Add interactive footer with links
665
- gr.Markdown("""
666
- <div style="margin-top: 2rem; text-align: center; padding: 1rem; border-top: 1px solid rgba(99, 102, 241, 0.2);">
667
- <p>Powered by <a href="https://huggingface.co/" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">Hugging Face</a> and <a href="https://github.com/TimDettmers/bitsandbytes" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">BitsAndBytes</a></p>
668
- </div>
669
- """)
670
-
671
  quantize_button.click(
672
  fn=quantize_and_save,
673
  inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
674
- outputs=[output_link]
675
  )
676
 
677
  if __name__ == "__main__":
678
- demo.launch(share=True)
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModel, BitsAndBytesConfig
4
  import tempfile
5
  from huggingface_hub import HfApi
6
  from huggingface_hub import list_models
 
8
  from bitsandbytes.nn import Linear4bit
9
  from packaging import version
10
  import os
11
+ from tqdm import tqdm
12
 
13
  def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
14
+ # ^ expect a gr.OAuthProfile object as input to get the user's profile
15
+ # if the user is not logged in, profile will be None
16
  if profile is None:
17
+ return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
18
+ return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
19
 
20
  def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
21
  """Check if a model exists in the user's Hugging Face repository."""
 
25
  if quantized_model_name :
26
  repo_name = f"{username}/{quantized_model_name}"
27
  else :
28
+ repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
29
 
30
  if repo_name in model_names:
31
  return f"Model '{repo_name}' already exists in your repository."
 
61
 
62
  return model_card
63
 
 
 
 
64
  DTYPE_MAPPING = {
65
  "int8": torch.int8,
66
  "uint8": torch.uint8,
 
70
  }
71
 
72
 
73
+ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None, progress=gr.Progress()):
74
+
75
+ progress(0, desc="Starting")
76
  print(f"Quantizing model: {quant_type_4}")
77
  quantization_config = BitsAndBytesConfig(
78
  load_in_4bit=True,
 
81
  bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
82
  bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
83
  )
84
+ model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token, torch_dtype=torch.bfloat16)
85
 
86
+ for _ , module in progress.tqdm(model.named_modules(), desc="Quantizing model", total=len(list(model.named_modules())), unit="layers"):
 
87
  if isinstance(module, Linear4bit):
88
  module.to("cuda")
89
  module.to("cpu")
 
92
  def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
93
  print("Saving quantized model")
94
  with tempfile.TemporaryDirectory() as tmpdirname:
95
+
96
+
97
  model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
98
  if quantized_model_name :
99
  repo_name = f"{username}/{quantized_model_name}"
100
  else :
101
+ repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
102
+
103
  model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
104
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
105
  f.write(model_card)
 
111
  repo_id=repo_name,
112
  repo_type="model",
113
  )
114
+ # Get model architecture as string
115
+ import io
116
+ from contextlib import redirect_stdout
117
+ import html
118
+
119
+ # Capture the model architecture string
120
+ f = io.StringIO()
121
+ with redirect_stdout(f):
122
+ print(model)
123
+ model_architecture_str = f.getvalue()
124
+
125
+ # Escape HTML characters and format with line breaks
126
+ model_architecture_str_html = html.escape(model_architecture_str).replace('\n', '<br/>')
127
+
128
+ # Format it for display in markdown with proper styling
129
+ model_architecture_info = f"""
130
+ <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
131
+ <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
132
  </div>
133
  """
134
+ return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
135
 
136
  def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
137
  if oauth_token is None :
 
147
  <h3>❌ Authentication Error</h3>
148
  <p>Please sign in to your HuggingFace account to use the quantizer.</p>
149
  </div>
150
+ """
151
  exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
152
  if exists_message :
153
  return f"""
 
157
  </div>
158
  """
159
  try:
160
+ # Download phase
161
  quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
162
+ final_message = save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
163
+
164
+ return final_message
165
+
166
  except Exception as e :
167
+ error_message = str(e).replace('\n', '<br/>')
168
  return f"""
169
  <div class="error-box">
170
  <h3>❌ Error Occurred</h3>
171
+ <p>{error_message}</p>
172
  </div>
173
  """
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ css="""/* Custom CSS to allow scrolling */
177
+ .gradio-container {overflow-y: auto;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ /* Fix alignment for radio buttons and checkboxes */
180
+ .gradio-radio {
181
+ display: flex !important;
182
+ align-items: center !important;
183
+ margin: 10px 0 !important;
 
 
 
 
 
 
 
 
 
 
 
 
184
  }
185
 
186
+ .gradio-checkbox {
187
+ display: flex !important;
188
+ align-items: center !important;
189
+ margin: 10px 0 !important;
 
 
 
 
190
  }
191
 
192
+ /* Ensure consistent spacing and alignment */
193
+ .gradio-dropdown, .gradio-textbox, .gradio-radio, .gradio-checkbox {
194
+ margin-bottom: 12px !important;
195
+ width: 100% !important;
 
 
 
196
  }
197
 
198
+ /* Align radio buttons and checkboxes horizontally */
199
+ .option-row {
200
+ display: flex !important;
201
+ justify-content: space-between !important;
202
+ align-items: center !important;
203
+ gap: 20px !important;
204
+ margin-bottom: 12px !important;
205
  }
206
 
207
+ .option-row .gradio-radio, .option-row .gradio-checkbox {
208
+ margin: 0 !important;
209
+ flex: 1 !important;
210
  }
211
 
212
+ /* Horizontally align radio button options with text */
213
+ .gradio-radio label {
214
+ display: flex !important;
215
+ align-items: center !important;
 
 
 
 
 
 
 
 
216
  }
217
 
218
+ .gradio-radio input[type="radio"] {
219
+ margin-right: 5px !important;
 
220
  }
221
 
222
+ /* Remove padding and margin from model name textbox for better alignment */
223
+ .model-name-textbox {
224
+ padding-left: 0 !important;
225
+ padding-right: 0 !important;
226
+ margin-left: 0 !important;
227
+ margin-right: 0 !important;
 
 
 
 
228
  }
229
 
230
+ /* Quantize button styling with glow effect */
231
+ button[variant="primary"] {
232
+ background: linear-gradient(135deg, #3B82F6, #10B981) !important;
233
  color: white !important;
234
+ padding: 16px 32px !important;
235
+ font-size: 1.1rem !important;
236
+ font-weight: 700 !important;
237
+ border: none !important;
238
+ border-radius: 12px !important;
239
+ box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
240
+ transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
241
+ position: relative;
242
+ overflow: hidden;
243
+ animation: glow 1.5s ease-in-out infinite alternate;
244
  }
245
 
246
+ button[variant="primary"]::before {
247
+ content: "✨ ";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  }
249
 
250
+ button[variant="primary"]:hover {
251
+ transform: translateY(-5px) scale(1.05) !important;
252
+ box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
253
  }
254
 
255
+ @keyframes glow {
256
+ from {
257
+ box-shadow: 0 0 10px rgba(59, 130, 246, 0.5);
258
+ }
259
+ to {
260
+ box-shadow: 0 0 20px rgba(59, 130, 246, 0.8), 0 0 30px rgba(16, 185, 129, 0.5);
261
+ }
262
  }
263
 
264
+ /* Login button styling with glow effect */
265
+ #login-button {
266
+ background: linear-gradient(135deg, #3B82F6, #10B981) !important;
267
  color: white !important;
268
+ font-weight: 700 !important;
269
+ border: none !important;
270
+ border-radius: 12px !important;
271
+ box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
272
+ transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
273
+ position: relative;
274
+ overflow: hidden;
275
+ animation: glow 1.5s ease-in-out infinite alternate;
276
+ max-width: 300px !important;
277
+ margin: 0 auto !important;
278
  }
279
 
280
+ #login-button::before {
281
+ content: "🔑 ";
282
+ display: inline-block !important;
283
+ vertical-align: middle !important;
284
+ margin-right: 5px !important;
285
+ line-height: normal !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
287
 
288
+ #login-button:hover {
289
+ transform: translateY(-3px) scale(1.03) !important;
290
+ box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
291
  }
292
 
293
+ #login-button::after {
294
+ content: "";
295
+ position: absolute;
296
+ top: 0;
297
+ left: -100%;
298
+ width: 100%;
299
+ height: 100%;
300
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
301
+ transition: 0.5s;
302
  }
303
 
304
+ #login-button:hover::after {
305
+ left: 100%;
 
306
  }
307
 
308
+ /* Toggle instructions button styling */
309
+ #toggle-button {
310
+ background: linear-gradient(135deg, #3B82F6, #10B981) !important;
 
311
  color: white !important;
312
+ font-size: 0.85rem !important;
 
 
313
  font-weight: 600 !important;
314
+ padding: 8px 16px !important;
315
+ border: none !important;
316
+ border-radius: 8px !important;
317
+ box-shadow: 0 2px 10px rgba(59, 130, 246, 0.3) !important;
318
+ transition: all 0.3s ease !important;
319
+ margin: 0.5rem auto 1.5rem auto !important;
320
+ display: block !important;
321
+ max-width: 200px !important;
322
+ text-align: center !important;
323
+ position: relative;
324
+ overflow: hidden;
325
  }
326
 
327
+ #toggle-button:hover {
328
  transform: translateY(-2px) !important;
329
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.5) !important;
 
 
 
 
 
 
 
 
 
330
  }
331
 
332
+ #toggle-button::after {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  content: "";
334
  position: absolute;
335
  top: 0;
336
+ left: -100%;
337
+ width: 100%;
338
+ height: 100%;
339
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
340
+ transition: 0.5s;
 
 
 
 
 
 
 
 
 
341
  }
342
 
343
+ #toggle-button:hover::after {
344
+ left: 100%;
 
 
 
 
 
 
 
 
345
  }
346
+ /* Progress Bar Styles */
347
+ .progress-container {
348
+ font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
349
+ padding: 20px;
350
+ background: white;
351
+ border-radius: 12px;
352
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
353
  }
354
 
355
+ .progress-stage {
356
+ font-size: 0.9rem;
357
+ font-weight: 600;
358
+ color: #64748b;
359
  }
360
 
361
+ .progress-stage .stage {
 
 
 
 
 
 
 
 
 
 
362
  position: relative;
363
+ padding: 8px 12px;
364
+ border-radius: 6px;
365
+ background: #f1f5f9;
366
+ transition: all 0.3s ease;
367
  }
368
 
369
+ .progress-stage .stage.completed {
370
+ background: #ecfdf5;
 
371
  }
372
 
373
+ .progress-bar {
374
+ box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
 
 
 
 
 
 
 
 
375
  }
376
+ .progress {
377
+ transition: width 0.8s cubic-bezier(0.4, 0, 0.2, 1);
378
+ box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);
379
  }
380
  """
381
 
382
+ def quantize_model_with_progress(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token, progress=gr.Progress()):
383
+ """Quantize model with progress updates."""
384
+ progress(0, desc="Loading model")
385
+
386
+ # Configure quantization
387
+ quantization_config = BitsAndBytesConfig(
388
+ load_in_4bit=True,
389
+ bnb_4bit_quant_type=quant_type_4,
390
+ bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
391
+ bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
392
+ bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
393
+ )
394
+
395
+ # Load model
396
+ model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token, torch_dtype=torch.bfloat16)
397
+ progress(0.33, desc="Quantizing")
398
+
399
+ # Quantize model
400
+ modules = list(model.named_modules())
401
+ for idx, (_, module) in enumerate(modules):
402
+ if isinstance(module, Linear4bit):
403
+ module.to("cuda")
404
+ module.to("cpu")
405
+ progress(0.33 + (0.33 * idx / len(modules)), desc="Quantizing")
406
+
407
+ progress(0.66, desc="Quantized successfully")
408
+ return model
409
+
410
+ def save_model_with_progress(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False, progress=gr.Progress()):
411
+ """Save model with progress updates."""
412
+ progress(0.67, desc="Preparing to push")
413
+
414
+ with tempfile.TemporaryDirectory() as tmpdirname:
415
+ # Save model
416
+ model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
417
+ progress(0.75, desc="Preparing to push")
418
+
419
+ # Prepare repo name and model card
420
+ if quantized_model_name:
421
+ repo_name = f"{username}/{quantized_model_name}"
422
+ else:
423
+ repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
424
+
425
+ model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
426
+ with open(os.path.join(tmpdirname, "README.md"), "w") as f:
427
+ f.write(model_card)
428
+ progress(0.80, desc="Model card created")
429
+
430
+ # Push to Hub
431
+ api = HfApi(token=auth_token.token)
432
+ api.create_repo(repo_name, exist_ok=True, private=not public)
433
+ progress(0.85, desc="Pushing to Hub")
434
+
435
+ # Upload files
436
+ api.upload_folder(
437
+ folder_path=tmpdirname,
438
+ repo_id=repo_name,
439
+ repo_type="model",
440
  )
441
+ progress(1.00, desc="Model pushed to Hub")
442
+
443
+ # Get model architecture as string
444
+ import io
445
+ from contextlib import redirect_stdout
446
+ import html
447
+
448
+ # Capture the model architecture string
449
+ f = io.StringIO()
450
+ with redirect_stdout(f):
451
+ print(model)
452
+ model_architecture_str = f.getvalue()
453
+
454
+ # Escape HTML characters and format with line breaks
455
+ model_architecture_str_html = html.escape(model_architecture_str).replace('\n', '<br/>')
456
+
457
+ # Format it for display in markdown with proper styling
458
+ model_architecture_info = f"""
459
+ <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
460
+ <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
461
+ </div>
462
+ """
463
+ return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
464
+
465
+ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public, progress=gr.Progress()):
466
+ if oauth_token is None:
467
+ return """
468
+ <div class="error-box">
469
+ <h3>❌ Authentication Error</h3>
470
+ <p>Please sign in to your HuggingFace account to use the quantizer.</p>
471
+ </div>
472
+ """
473
+ if not profile:
474
+ return """
475
+ <div class="error-box">
476
+ <h3>❌ Authentication Error</h3>
477
+ <p>Please sign in to your HuggingFace account to use the quantizer.</p>
478
+ </div>
479
+ """
480
+ exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
481
+ if exists_message:
482
+ return f"""
483
+ <div class="warning-box">
484
+ <h3>⚠️ Model Already Exists</h3>
485
+ <p>{exists_message}</p>
486
+ </div>
487
+ """
488
+ try:
489
+ # Download and quantize phase
490
+ progress(0, desc="Starting quantization process")
491
+ quantized_model = quantize_model_with_progress(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token, progress)
492
 
493
+ # Save and push phase
494
+ final_message = save_model_with_progress(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public, progress)
495
 
496
+ return final_message
 
 
 
497
 
498
+ except Exception as e:
499
+ error_message = str(e).replace('\n', '<br/>')
500
+ return f"""
501
+ <div class="error-box">
502
+ <h3>❌ Error Occurred</h3>
503
+ <p>{error_message}</p>
504
+ </div>
505
+ """
506
+
507
+ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
508
+ gr.Markdown(
509
+ """
510
+ # 🤗 LLM Model BitsAndBytes Quantizer ✨
511
+
512
+ """
513
+ )
514
+
515
+ gr.LoginButton(elem_id="login-button", elem_classes="center-button", min_width=250)
516
+
517
+ m1 = gr.Markdown()
518
+ demo.load(hello, inputs=None, outputs=m1)
519
+
520
+ instructions_visible = gr.State(False)
521
+
522
+ with gr.Row():
523
+ with gr.Column():
524
+ with gr.Row():
525
+ model_name = HuggingfaceHubSearch(
526
+ label="🔍 Hub Model ID",
527
+ placeholder="Search for model id on Huggingface",
528
+ search_type="model",
529
+ )
530
+ with gr.Row():
531
+ with gr.Column():
532
+ gr.Markdown(
533
+ """
534
+ ### ⚙️ Model Quantization Type Settings
535
+ """
536
  )
 
 
 
 
 
 
537
  quant_type_4 = gr.Dropdown(
538
+ info="The quantization data type in the bnb.nn.Linear4Bit layers",
539
  choices=["fp4", "nf4"],
540
+ value="nf4",
541
+ visible=True,
 
542
  show_label=False
543
  )
 
 
544
  compute_type_4 = gr.Dropdown(
545
+ info="The compute type for the model",
546
  choices=["float16", "bfloat16", "float32"],
547
+ value="bfloat16",
548
+ visible=True,
549
+ show_label=False
550
  )
 
551
  quant_storage_4 = gr.Dropdown(
552
+ info="The storage type for the model",
553
  choices=["float16", "float32", "int8", "uint8", "bfloat16"],
554
  value="uint8",
555
+ visible=True,
556
+ show_label=False
557
  )
558
+ gr.Markdown(
559
+ """
560
+ ### 🔄 Double Quantization Settings
561
+ """
 
 
 
562
  )
563
+ with gr.Row(elem_classes="option-row"):
564
+ double_quant_4 = gr.Radio(
565
+ ["True", "False"],
566
+ info="Use Double Quant",
567
+ visible=True,
568
+ value="True",
569
+ show_label=False
570
+ )
571
+ gr.Markdown(
572
+ """
573
+ ### 💾 Saving Settings
574
+ """
575
  )
576
+ with gr.Row():
577
+ quantized_model_name = gr.Textbox(
578
+ label="✏️ Model Name",
579
+ info="Model Name (optional : to override default)",
580
+ value="",
581
+ interactive=True,
582
+ elem_classes="model-name-textbox",
583
+ show_label=False,
584
+ )
585
 
586
+ with gr.Row():
587
+ public = gr.Checkbox(
588
+ label="🌐 Make model public",
589
+ info="If checked, the model will be publicly accessible",
590
+ value=True,
591
+ interactive=True,
592
+ show_label=True
593
+ )
594
+
595
+ with gr.Column():
596
+ quantize_button = gr.Button("🚀 Quantize and Push to the Hub", variant="primary")
597
+ output_link = gr.Markdown("🔗 Quantized Model", container=True, min_height=100)
598
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
  quantize_button.click(
600
  fn=quantize_and_save,
601
  inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
602
+ outputs=[output_link],
603
  )
604
 
605
  if __name__ == "__main__":
606
+ demo.launch(share=True)