MekkCyber commited on
Commit
fa23c0d
·
1 Parent(s): 40a26a8
Files changed (3) hide show
  1. app.py +275 -135
  2. app_claude.py +678 -0
  3. requirements.txt +1 -1
app.py CHANGED
@@ -5,6 +5,7 @@ import tempfile
5
  from huggingface_hub import HfApi
6
  from huggingface_hub import list_models
7
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
 
8
  from packaging import version
9
  import os
10
 
@@ -13,10 +14,10 @@ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) ->
13
  # ^ expect a gr.OAuthProfile object as input to get the user's profile
14
  # if the user is not logged in, profile will be None
15
  if profile is None:
16
- return "Hello !"
17
- return f"Hello {profile.name} ! Welcome to BitsAndBytes Space"
18
 
19
- def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization_type, model_name, quantized_model_name):
20
  """Check if a model exists in the user's Hugging Face repository."""
21
  try:
22
  models = list_models(author=username, token=oauth_token.token)
@@ -24,7 +25,7 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization
24
  if quantized_model_name :
25
  repo_name = f"{username}/{quantized_model_name}"
26
  else :
27
- repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
28
 
29
  if repo_name in model_names:
30
  return f"Model '{repo_name}' already exists in your repository."
@@ -33,7 +34,7 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization
33
  except Exception as e:
34
  return f"Error checking model existence: {str(e)}"
35
 
36
- def create_model_card(model_name, quantization_type, threshold, quant_type_4, double_quant_4,):
37
  model_card = f"""---
38
  base_model:
39
  - {model_name}
@@ -42,17 +43,17 @@ base_model:
42
  # {model_name} (Quantized)
43
 
44
  ## Description
45
- This model is a quantized version of the original model `{model_name}`. It has been quantized using {quantization_type} quantization with bitsandbytes.
46
 
47
  ## Quantization Details
48
- - **Quantization Type**: {quantization_type}
49
- - **Threshold**: {threshold if quantization_type == "int8" else None}
50
- - **bnb_4bit_quant_type**: {quant_type_4 if quantization_type == "int4" else None}
51
- - **bnb_4bit_use_double_quant**: {double_quant_4 if quantization_type=="int4" else None}
 
52
 
53
  ## Usage
54
  You can use this model in your applications by loading it directly from the Hugging Face Hub:
55
-
56
  ```python
57
  from transformers import AutoModel
58
 
@@ -63,24 +64,33 @@ model = AutoModel.from_pretrained("{model_name}")"""
63
  def load_model(model_name, quantization_config, auth_token) :
64
  return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
65
 
66
- def quantize_model(model_name, quantization_type, threshold, quant_type_4, double_quant_4, auth_token=None, username=None):
67
- print(f"Quantizing model: {quantization_type}")
68
- if quantization_type=="int4":
69
- quantization_config = BitsAndBytesConfig(
70
- load_in_4bit=True,
71
- bnb_4bit_quant_type=quant_type_4,
72
- bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
73
- )
74
- else :
75
- quantization_config = BitsAndBytesConfig(
76
- load_in_8bit=True,
77
- llm_int8_threshold=threshold,
78
- )
79
- model = load_model(model_name, quantization_config=quantization_config, auth_token=auth_token)
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  return model
82
 
83
- def save_model(model, model_name, quantization_type, threshold, quant_type_4, double_quant_4, username=None, auth_token=None, quantized_model_name=None):
84
  print("Saving quantized model")
85
  with tempfile.TemporaryDirectory() as tmpdirname:
86
 
@@ -89,15 +99,15 @@ def save_model(model, model_name, quantization_type, threshold, quant_type_4, do
89
  if quantized_model_name :
90
  repo_name = f"{username}/{quantized_model_name}"
91
  else :
92
- repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
93
 
94
 
95
- model_card = create_model_card(repo_name, quantization_type, threshold, quant_type_4, double_quant_4)
96
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
97
  f.write(model_card)
98
  # Push to Hub
99
  api = HfApi(token=auth_token.token)
100
- api.create_repo(repo_name, exist_ok=True)
101
  api.upload_folder(
102
  folder_path=tmpdirname,
103
  repo_id=repo_name,
@@ -105,30 +115,17 @@ def save_model(model, model_name, quantization_type, threshold, quant_type_4, do
105
  )
106
  return f'<h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a>'
107
 
108
- def is_float(value):
109
- try:
110
- float(value)
111
- return True
112
- except ValueError:
113
- return False
114
-
115
- def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quantization_type, threshold, quant_type_4, double_quant_4, quantized_model_name):
116
  if oauth_token is None :
117
  return "Error : Please Sign In to your HuggingFace account to use the quantizer"
118
  if not profile:
119
  return "Error: Please Sign In to your HuggingFace account to use the quantizer"
120
- exists_message = check_model_exists(oauth_token, profile.username, quantization_type, model_name, quantized_model_name)
121
  if exists_message :
122
  return exists_message
123
-
124
- if not is_float(threshold) :
125
- return "Threshold must be a float"
126
-
127
- threshold = float(threshold)
128
-
129
  # try:
130
- quantized_model = quantize_model(model_name, quantization_type, threshold, quant_type_4, double_quant_4, oauth_token, profile.username)
131
- return save_model(quantized_model, model_name, quantization_type, threshold, quant_type_4, double_quant_4, profile.username, oauth_token, quantized_model_name)
132
  # except Exception as e :
133
  # print(e)
134
  # return f"An error occurred: {str(e)}"
@@ -136,16 +133,183 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
136
 
137
  css="""/* Custom CSS to allow scrolling */
138
  .gradio-container {overflow-y: auto;}
139
- .custom-radio {
140
- margin-left: 20px; /* Adjust the value as needed */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  }
 
142
  """
 
 
143
  with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
144
  gr.Markdown(
145
  """
146
- # 🤗 LLM Model BitsAndBytes Quantization App
147
 
148
- Quantize your favorite Hugging Face models using BitsAndBytes and save them to your profile!
149
  """
150
  )
151
 
@@ -153,117 +317,93 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
153
 
154
  m1 = gr.Markdown()
155
  demo.load(hello, inputs=None, outputs=m1)
156
-
157
-
158
- # radio = gr.Radio(["show", "hide"], label="Show Instructions")
159
- instructions = gr.Markdown(
160
- """
161
- ## Instructions
162
-
163
- 1. Login to your HuggingFace account
164
- 2. Enter the name of the Hugging Face LLM model you want to quantize (Make sure you have access to it)
165
- 3. Choose the quantization type.
166
- 4. Optionally, specify the group size.
167
- 5. Optionally, choose a custom name for the quantized model
168
- 6. Click "Quantize and Save Model" to start the process.
169
- 7. Once complete, you'll receive a link to the quantized model on Hugging Face.
170
-
171
- Note: This process may take some time depending on the model size and your hardware you can check the container logs to see where are you at in the process!
172
- """,
173
- visible=False
174
- )
175
-
176
- instructions_visible = gr.State(False)
177
- toggle_button = gr.Button("▼ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button")
178
-
179
- def toggle_instructions(instructions_visible):
180
- new_visibility = not instructions_visible # Toggle the state
181
- new_label = "▲ Hide Instructions" if new_visibility else "▼ Show Instructions" # Change label based on visibility
182
- return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label) # Toggle visibility and return new state
183
 
184
- toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button])
185
-
186
- # def update_visibility(radio): # Accept the event argument, even if not used
187
- # value = radio # Get the selected value from the radio button
188
- # if value == "show":
189
- # return gr.Textbox(visible=True) #make it visible
190
- # else:
191
- # return gr.Textbox(visible=False)
192
- # radio.change(update_visibility, radio, instructions)
193
 
194
  with gr.Row():
195
  with gr.Column():
196
  with gr.Row():
197
  model_name = HuggingfaceHubSearch(
198
- label="Hub Model ID",
199
  placeholder="Search for model id on Huggingface",
200
  search_type="model",
201
  )
202
  with gr.Row():
203
- with gr.Column():
204
- quantization_type = gr.Dropdown(
205
- info="Quantization Type",
206
- choices=["int4", "int8"],
207
- value="int8",
208
- filterable=False,
209
- show_label=False,
210
- )
211
- threshold_8 = gr.Textbox(
212
- info="Outlier threshold",
213
- value=6,
214
- interactive=True,
215
- show_label=False,
216
- visible=True
217
  )
218
  quant_type_4 = gr.Dropdown(
219
  info="The quantization data type in the bnb.nn.Linear4Bit layers",
220
  choices=["fp4", "nf4"],
221
  value="fp4",
222
- visible=False,
223
  show_label=False
224
  )
225
- radio_4 = gr.Radio(["False", "True"], info="Use Double Quant", visible=False, value="False", elem_classes="custom_radio")
226
-
227
- def update_visibility(quantization_type):
228
- return gr.update(visible=(quantization_type=="int8")), gr.update(visible=(quantization_type=="int4")), gr.update(visible=(quantization_type=="int4"))
229
-
230
- quantization_type.change(fn=update_visibility, inputs=quantization_type, outputs=[threshold_8, quant_type_4, radio_4])
231
-
232
- quantized_model_name = gr.Textbox(
233
- info="Model Name (optional : to override default)",
234
- value="",
235
- interactive=True,
236
  show_label=False
237
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  with gr.Column():
239
- quantize_button = gr.Button("Quantize and Save Model", variant="primary")
240
- output_link = gr.Markdown(label="Quantized Model Link", container=True, min_height=80)
241
-
242
-
243
- # Adding CSS styles for the username box
244
- demo.css = """
245
- #username-box {
246
- background-color: #f0f8ff; /* Light color */
247
- border-radius: 8px;
248
- padding: 10px;
249
- }
250
- """
251
- demo.css = """
252
- .center-button {
253
- display: flex;
254
- justify-content: center;
255
- align-items: center;
256
- margin: 0 auto; /* Center horizontally */
257
- }
258
- """
259
 
260
  quantize_button.click(
261
  fn=quantize_and_save,
262
- inputs=[model_name, quantization_type, threshold_8, quant_type_4, radio_4, quantized_model_name],
263
  outputs=[output_link]
264
  )
265
 
266
  if __name__ == "__main__":
267
  demo.launch(share=True)
268
  # Launch the app
269
- # demo.launch(share=True, debug=True)
 
5
  from huggingface_hub import HfApi
6
  from huggingface_hub import list_models
7
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
+ from bitsandbytes.nn import Linear4bit
9
  from packaging import version
10
  import os
11
 
 
14
  # ^ expect a gr.OAuthProfile object as input to get the user's profile
15
  # if the user is not logged in, profile will be None
16
  if profile is None:
17
+ return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
18
+ return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
19
 
20
+ def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
21
  """Check if a model exists in the user's Hugging Face repository."""
22
  try:
23
  models = list_models(author=username, token=oauth_token.token)
 
25
  if quantized_model_name :
26
  repo_name = f"{username}/{quantized_model_name}"
27
  else :
28
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
29
 
30
  if repo_name in model_names:
31
  return f"Model '{repo_name}' already exists in your repository."
 
34
  except Exception as e:
35
  return f"Error checking model existence: {str(e)}"
36
 
37
+ def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4):
38
  model_card = f"""---
39
  base_model:
40
  - {model_name}
 
43
  # {model_name} (Quantized)
44
 
45
  ## Description
46
+ This model is a quantized version of the original model `{model_name}`. It has been quantized using int4 quantization with bitsandbytes.
47
 
48
  ## Quantization Details
49
+ - **Quantization Type**: int4
50
+ - **bnb_4bit_quant_type**: {quant_type_4}
51
+ - **bnb_4bit_use_double_quant**: {double_quant_4}
52
+ - **bnb_4bit_compute_dtype**: {compute_type_4}
53
+ - **bnb_4bit_quant_storage**: {quant_storage_4}
54
 
55
  ## Usage
56
  You can use this model in your applications by loading it directly from the Hugging Face Hub:
 
57
  ```python
58
  from transformers import AutoModel
59
 
 
64
  def load_model(model_name, quantization_config, auth_token) :
65
  return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
66
 
67
+ DTYPE_MAPPING = {
68
+ "int8": torch.int8,
69
+ "uint8": torch.uint8,
70
+ "float16": torch.float16,
71
+ "float32": torch.float32,
72
+ "bfloat16": torch.bfloat16,
73
+ }
74
+
 
 
 
 
 
 
75
 
76
+ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
77
+ print(f"Quantizing model: {quant_type_4}")
78
+ quantization_config = BitsAndBytesConfig(
79
+ load_in_4bit=True,
80
+ bnb_4bit_quant_type=quant_type_4,
81
+ bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
82
+ bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
83
+ bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
84
+ )
85
+
86
+ model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
87
+ for _ , module in model.named_modules():
88
+ if isinstance(module, Linear4bit):
89
+ module.to("cuda")
90
+ module.to("cpu")
91
  return model
92
 
93
+ def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
94
  print("Saving quantized model")
95
  with tempfile.TemporaryDirectory() as tmpdirname:
96
 
 
99
  if quantized_model_name :
100
  repo_name = f"{username}/{quantized_model_name}"
101
  else :
102
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
103
 
104
 
105
+ model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
106
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
107
  f.write(model_card)
108
  # Push to Hub
109
  api = HfApi(token=auth_token.token)
110
+ api.create_repo(repo_name, exist_ok=True, private=not public)
111
  api.upload_folder(
112
  folder_path=tmpdirname,
113
  repo_id=repo_name,
 
115
  )
116
  return f'<h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a>'
117
 
118
+ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
 
 
 
 
 
 
 
119
  if oauth_token is None :
120
  return "Error : Please Sign In to your HuggingFace account to use the quantizer"
121
  if not profile:
122
  return "Error: Please Sign In to your HuggingFace account to use the quantizer"
123
+ exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
124
  if exists_message :
125
  return exists_message
 
 
 
 
 
 
126
  # try:
127
+ quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
128
+ return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
129
  # except Exception as e :
130
  # print(e)
131
  # return f"An error occurred: {str(e)}"
 
133
 
134
  css="""/* Custom CSS to allow scrolling */
135
  .gradio-container {overflow-y: auto;}
136
+
137
+ /* Fix alignment for radio buttons and checkboxes */
138
+ .gradio-radio {
139
+ display: flex !important;
140
+ align-items: center !important;
141
+ margin: 10px 0 !important;
142
+ }
143
+
144
+ .gradio-checkbox {
145
+ display: flex !important;
146
+ align-items: center !important;
147
+ margin: 10px 0 !important;
148
+ }
149
+
150
+ /* Ensure consistent spacing and alignment */
151
+ .gradio-dropdown, .gradio-textbox, .gradio-radio, .gradio-checkbox {
152
+ margin-bottom: 12px !important;
153
+ width: 100% !important;
154
+ }
155
+
156
+ /* Align radio buttons and checkboxes horizontally */
157
+ .option-row {
158
+ display: flex !important;
159
+ justify-content: space-between !important;
160
+ align-items: center !important;
161
+ gap: 20px !important;
162
+ margin-bottom: 12px !important;
163
+ }
164
+
165
+ .option-row .gradio-radio, .option-row .gradio-checkbox {
166
+ margin: 0 !important;
167
+ flex: 1 !important;
168
+ }
169
+
170
+ /* Horizontally align radio button options with text */
171
+ .gradio-radio label {
172
+ display: flex !important;
173
+ align-items: center !important;
174
+ }
175
+
176
+ .gradio-radio input[type="radio"] {
177
+ margin-right: 5px !important;
178
+ }
179
+
180
+ /* Remove padding and margin from model name textbox for better alignment */
181
+ .model-name-textbox {
182
+ padding-left: 0 !important;
183
+ padding-right: 0 !important;
184
+ margin-left: 0 !important;
185
+ margin-right: 0 !important;
186
+ }
187
+
188
+ /* Quantize button styling with glow effect */
189
+ button[variant="primary"] {
190
+ background: linear-gradient(135deg, #3B82F6, #10B981) !important;
191
+ color: white !important;
192
+ padding: 16px 32px !important;
193
+ font-size: 1.1rem !important;
194
+ font-weight: 700 !important;
195
+ border: none !important;
196
+ border-radius: 12px !important;
197
+ box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
198
+ transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
199
+ position: relative;
200
+ overflow: hidden;
201
+ animation: glow 1.5s ease-in-out infinite alternate;
202
+ }
203
+
204
+ button[variant="primary"]::before {
205
+ content: "✨ ";
206
+ }
207
+
208
+ button[variant="primary"]:hover {
209
+ transform: translateY(-5px) scale(1.05) !important;
210
+ box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
211
+ }
212
+
213
+ @keyframes glow {
214
+ from {
215
+ box-shadow: 0 0 10px rgba(59, 130, 246, 0.5);
216
+ }
217
+ to {
218
+ box-shadow: 0 0 20px rgba(59, 130, 246, 0.8), 0 0 30px rgba(16, 185, 129, 0.5);
219
+ }
220
+ }
221
+
222
+ /* Login button styling with glow effect */
223
+ #login-button {
224
+ background: linear-gradient(135deg, #3B82F6, #10B981) !important;
225
+ color: white !important;
226
+ font-weight: 700 !important;
227
+ border: none !important;
228
+ border-radius: 12px !important;
229
+ box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
230
+ transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
231
+ position: relative;
232
+ overflow: hidden;
233
+ animation: glow 1.5s ease-in-out infinite alternate;
234
+ max-width: 300px !important;
235
+ margin: 0 auto !important;
236
+ }
237
+
238
+ #login-button::before {
239
+ content: "🔑 ";
240
+ display: inline-block !important;
241
+ vertical-align: middle !important;
242
+ margin-right: 5px !important;
243
+ line-height: normal !important;
244
+ }
245
+
246
+ #login-button:hover {
247
+ transform: translateY(-3px) scale(1.03) !important;
248
+ box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
249
+ }
250
+
251
+ #login-button::after {
252
+ content: "";
253
+ position: absolute;
254
+ top: 0;
255
+ left: -100%;
256
+ width: 100%;
257
+ height: 100%;
258
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
259
+ transition: 0.5s;
260
+ }
261
+
262
+ #login-button:hover::after {
263
+ left: 100%;
264
+ }
265
+
266
+ /* Toggle instructions button styling */
267
+ #toggle-button {
268
+ background: linear-gradient(135deg, #3B82F6, #10B981) !important;
269
+ color: white !important;
270
+ font-size: 0.85rem !important;
271
+ font-weight: 600 !important;
272
+ padding: 8px 16px !important;
273
+ border: none !important;
274
+ border-radius: 8px !important;
275
+ box-shadow: 0 2px 10px rgba(59, 130, 246, 0.3) !important;
276
+ transition: all 0.3s ease !important;
277
+ margin: 0.5rem auto 1.5rem auto !important;
278
+ display: block !important;
279
+ max-width: 200px !important;
280
+ text-align: center !important;
281
+ position: relative;
282
+ overflow: hidden;
283
+ }
284
+
285
+ #toggle-button:hover {
286
+ transform: translateY(-2px) !important;
287
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.5) !important;
288
+ }
289
+
290
+ #toggle-button::after {
291
+ content: "";
292
+ position: absolute;
293
+ top: 0;
294
+ left: -100%;
295
+ width: 100%;
296
+ height: 100%;
297
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
298
+ transition: 0.5s;
299
+ }
300
+
301
+ #toggle-button:hover::after {
302
+ left: 100%;
303
  }
304
+
305
  """
306
+
307
+
308
  with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
309
  gr.Markdown(
310
  """
311
+ # 🤗 LLM Model BitsAndBytes Quantizer
312
 
 
313
  """
314
  )
315
 
 
317
 
318
  m1 = gr.Markdown()
319
  demo.load(hello, inputs=None, outputs=m1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
+ instructions_visible = gr.State(False)
 
 
 
 
 
 
 
 
322
 
323
  with gr.Row():
324
  with gr.Column():
325
  with gr.Row():
326
  model_name = HuggingfaceHubSearch(
327
+ label="🔍 Hub Model ID",
328
  placeholder="Search for model id on Huggingface",
329
  search_type="model",
330
  )
331
  with gr.Row():
332
+ with gr.Column():
333
+ gr.Markdown(
334
+ """
335
+ ### ⚙️ Model Quantization Type Settings
336
+ """
 
 
 
 
 
 
 
 
 
337
  )
338
  quant_type_4 = gr.Dropdown(
339
  info="The quantization data type in the bnb.nn.Linear4Bit layers",
340
  choices=["fp4", "nf4"],
341
  value="fp4",
342
+ visible=True,
343
  show_label=False
344
  )
345
+ compute_type_4 = gr.Dropdown(
346
+ info="The compute type for the model",
347
+ choices=["float16", "bfloat16", "float32"],
348
+ value="float32",
349
+ visible=True,
 
 
 
 
 
 
350
  show_label=False
351
  )
352
+ quant_storage_4 = gr.Dropdown(
353
+ info="The storage type for the model",
354
+ choices=["float16", "float32", "int8", "uint8", "bfloat16"],
355
+ value="uint8",
356
+ visible=True,
357
+ show_label=False
358
+ )
359
+ gr.Markdown(
360
+ """
361
+ ### 🔄 Double Quantization Settings
362
+ """
363
+ )
364
+ with gr.Row(elem_classes="option-row"):
365
+ double_quant_4 = gr.Radio(
366
+ ["False", "True"],
367
+ info="Use Double Quant",
368
+ visible=True,
369
+ value="False",
370
+ show_label=False
371
+ )
372
+ gr.Markdown(
373
+ """
374
+ ### 💾 Saving Settings
375
+ """
376
+ )
377
+ with gr.Row():
378
+ quantized_model_name = gr.Textbox(
379
+ label="✏️ Model Name",
380
+ info="Model Name (optional : to override default)",
381
+ value="",
382
+ interactive=True,
383
+ elem_classes="model-name-textbox",
384
+ show_label=False,
385
+ )
386
+
387
+ with gr.Row():
388
+ public = gr.Checkbox(
389
+ label="🌐 Make model public",
390
+ info="If checked, the model will be publicly accessible",
391
+ value=False,
392
+ interactive=True,
393
+ show_label=True
394
+ )
395
+
396
  with gr.Column():
397
+ quantize_button = gr.Button("🚀 Quantize and Save Model", variant="primary")
398
+ output_link = gr.Markdown(label="🔗 Quantized Model Link", container=True, min_height=80)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  quantize_button.click(
401
  fn=quantize_and_save,
402
+ inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
403
  outputs=[output_link]
404
  )
405
 
406
  if __name__ == "__main__":
407
  demo.launch(share=True)
408
  # Launch the app
409
+ # demo.launch(share=True, debug=True)
app_claude.py ADDED
@@ -0,0 +1,678 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
4
+ import tempfile
5
+ from huggingface_hub import HfApi
6
+ from huggingface_hub import list_models
7
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
+ from bitsandbytes.nn import Linear4bit
9
+ from packaging import version
10
+ import os
11
+
12
+
13
+ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
14
+ if profile is None:
15
+ return "👋 Hello! Sign in to get started with the BitsAndBytes Quantizer."
16
+ return f"👋 Hello {profile.name}! Welcome to the BitsAndBytes Quantizer."
17
+
18
+ def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
19
+ """Check if a model exists in the user's Hugging Face repository."""
20
+ try:
21
+ models = list_models(author=username, token=oauth_token.token)
22
+ model_names = [model.id for model in models]
23
+ if quantized_model_name :
24
+ repo_name = f"{username}/{quantized_model_name}"
25
+ else :
26
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
27
+
28
+ if repo_name in model_names:
29
+ return f"Model '{repo_name}' already exists in your repository."
30
+ else:
31
+ return None # Model does not exist
32
+ except Exception as e:
33
+ return f"Error checking model existence: {str(e)}"
34
+
35
+ def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4):
36
+ model_card = f"""---
37
+ base_model:
38
+ - {model_name}
39
+ ---
40
+
41
+ # {model_name} (Quantized)
42
+
43
+ ## Description
44
+ This model is a quantized version of the original model `{model_name}`. It has been quantized using int4 quantization with bitsandbytes.
45
+
46
+ ## Quantization Details
47
+ - **Quantization Type**: int4
48
+ - **bnb_4bit_quant_type**: {quant_type_4}
49
+ - **bnb_4bit_use_double_quant**: {double_quant_4}
50
+ - **bnb_4bit_compute_dtype**: {compute_type_4}
51
+ - **bnb_4bit_quant_storage**: {quant_storage_4}
52
+
53
+ ## Usage
54
+ You can use this model in your applications by loading it directly from the Hugging Face Hub:
55
+ ```python
56
+ from transformers import AutoModel
57
+
58
+ model = AutoModel.from_pretrained("{model_name}")"""
59
+
60
+ return model_card
61
+
62
+ def load_model(model_name, quantization_config, auth_token) :
63
+ return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
64
+
65
+ DTYPE_MAPPING = {
66
+ "int8": torch.int8,
67
+ "uint8": torch.uint8,
68
+ "float16": torch.float16,
69
+ "float32": torch.float32,
70
+ "bfloat16": torch.bfloat16,
71
+ }
72
+
73
+
74
+ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
75
+ print(f"Quantizing model: {quant_type_4}")
76
+ quantization_config = BitsAndBytesConfig(
77
+ load_in_4bit=True,
78
+ bnb_4bit_quant_type=quant_type_4,
79
+ bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
80
+ bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
81
+ bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
82
+ )
83
+
84
+ model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
85
+ for _ , module in model.named_modules():
86
+ if isinstance(module, Linear4bit):
87
+ module.to("cuda")
88
+ module.to("cpu")
89
+ return model
90
+
91
+ def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
92
+ print("Saving quantized model")
93
+ with tempfile.TemporaryDirectory() as tmpdirname:
94
+ model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
95
+ if quantized_model_name :
96
+ repo_name = f"{username}/{quantized_model_name}"
97
+ else :
98
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
99
+
100
+ model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
101
+ with open(os.path.join(tmpdirname, "README.md"), "w") as f:
102
+ f.write(model_card)
103
+ # Push to Hub
104
+ api = HfApi(token=auth_token.token)
105
+ api.create_repo(repo_name, exist_ok=True, private=not public)
106
+ api.upload_folder(
107
+ folder_path=tmpdirname,
108
+ repo_id=repo_name,
109
+ repo_type="model",
110
+ )
111
+ return f"""
112
+ <div class="success-box">
113
+ <h2>🎉 Quantization Complete!</h2>
114
+ <p>Your quantized model is now available at:</p>
115
+ <a href="https://huggingface.co/{repo_name}" target="_blank" class="model-link">
116
+ huggingface.co/{repo_name}
117
+ </a>
118
+ </div>
119
+ """
120
+
121
+ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
122
+ if oauth_token is None :
123
+ return """
124
+ <div class="error-box">
125
+ <h3>❌ Authentication Error</h3>
126
+ <p>Please sign in to your HuggingFace account to use the quantizer.</p>
127
+ </div>
128
+ """
129
+ if not profile:
130
+ return """
131
+ <div class="error-box">
132
+ <h3>❌ Authentication Error</h3>
133
+ <p>Please sign in to your HuggingFace account to use the quantizer.</p>
134
+ </div>
135
+ """
136
+ exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
137
+ if exists_message :
138
+ return f"""
139
+ <div class="warning-box">
140
+ <h3>⚠️ Model Already Exists</h3>
141
+ <p>{exists_message}</p>
142
+ </div>
143
+ """
144
+ try:
145
+ quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
146
+ return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
147
+ except Exception as e :
148
+ print(e)
149
+ return f"""
150
+ <div class="error-box">
151
+ <h3>❌ Error Occurred</h3>
152
+ <p>{str(e)}</p>
153
+ </div>
154
+ """
155
+
156
+ css = """
157
+ :root {
158
+ --primary: #6366f1;
159
+ --primary-light: #818cf8;
160
+ --primary-dark: #4f46e5;
161
+ --secondary: #10b981;
162
+ --accent: #f97316;
163
+ --background: #f8fafc;
164
+ --text: #1e293b;
165
+ --card-bg: #ffffff;
166
+ --input-bg: #f1f5f9;
167
+ --error: #ef4444;
168
+ --warning: #f59e0b;
169
+ --success: #10b981;
170
+ --border-radius: 12px;
171
+ --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
172
+ --transition: all 0.3s ease;
173
+ }
174
+
175
+ body, .gradio-container {
176
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif;
177
+ color: var(--text);
178
+ background-color: var(--background);
179
+ }
180
+
181
+ h1 {
182
+ font-size: 2.5rem !important;
183
+ font-weight: 800 !important;
184
+ text-align: center;
185
+ background: linear-gradient(45deg, var(--primary), var(--accent));
186
+ -webkit-background-clip: text;
187
+ background-clip: text;
188
+ color: transparent !important;
189
+ margin-bottom: 1rem !important;
190
+ padding: 1rem 0 !important;
191
+ }
192
+
193
+ h2 {
194
+ font-size: 1.75rem !important;
195
+ font-weight: 700 !important;
196
+ color: var(--primary-dark) !important;
197
+ margin-top: 1.5rem !important;
198
+ margin-bottom: 1rem !important;
199
+ }
200
+
201
+ h3 {
202
+ font-size: 1.25rem !important;
203
+ font-weight: 600 !important;
204
+ color: var(--primary) !important;
205
+ margin-top: 1rem !important;
206
+ margin-bottom: 0.5rem !important;
207
+ border-bottom: 2px solid var(--primary-light);
208
+ padding-bottom: 0.5rem;
209
+ width: fit-content;
210
+ }
211
+
212
+ /* Main container styling */
213
+ .main-container {
214
+ max-width: 1200px;
215
+ margin: 0 auto;
216
+ padding: 2rem;
217
+ background-color: var(--card-bg);
218
+ border-radius: var(--border-radius);
219
+ box-shadow: var(--shadow);
220
+ }
221
+
222
+ /* Button styling */
223
+ button {
224
+ border-radius: var(--border-radius) !important;
225
+ font-weight: 600 !important;
226
+ transition: var(--transition) !important;
227
+ text-transform: uppercase;
228
+ letter-spacing: 0.5px;
229
+ }
230
+
231
+ button.primary {
232
+ background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
233
+ border: none !important;
234
+ color: white !important;
235
+ padding: 12px 24px !important;
236
+ box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
237
+ }
238
+
239
+ button.primary:hover {
240
+ transform: translateY(-2px) !important;
241
+ box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
242
+ }
243
+
244
+ /* Login button styling */
245
+ #login-button {
246
+ margin: 1.5rem auto !important;
247
+ min-width: 200px !important;
248
+ background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
249
+ color: white !important;
250
+ font-weight: 600 !important;
251
+ padding: 12px 24px !important;
252
+ border-radius: var(--border-radius) !important;
253
+ border: none !important;
254
+ box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
255
+ transition: var(--transition) !important;
256
+ }
257
+
258
+ #login-button:hover {
259
+ transform: translateY(-2px) !important;
260
+ box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
261
+ }
262
+
263
+ /* Toggle button styling */
264
+ #toggle-button {
265
+ background: transparent !important;
266
+ color: var(--primary) !important;
267
+ border: 2px solid var(--primary-light) !important;
268
+ padding: 8px 16px !important;
269
+ margin: 1rem 0 !important;
270
+ border-radius: var(--border-radius) !important;
271
+ transition: var(--transition) !important;
272
+ font-weight: 600 !important;
273
+ }
274
+
275
+ #toggle-button:hover {
276
+ background-color: var(--primary-light) !important;
277
+ color: white !important;
278
+ }
279
+
280
+ /* Input fields styling */
281
+ input, select, textarea {
282
+ border-radius: var(--border-radius) !important;
283
+ border: 2px solid var(--input-bg) !important;
284
+ padding: 10px 16px !important;
285
+ background-color: var(--input-bg) !important;
286
+ transition: var(--transition) !important;
287
+ }
288
+
289
+ input:focus, select:focus, textarea:focus {
290
+ border-color: var(--primary-light) !important;
291
+ box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important;
292
+ }
293
+
294
+ /* Dropdown styling with nice hover effects */
295
+ .gradio-dropdown > div {
296
+ border-radius: var(--border-radius) !important;
297
+ border: 2px solid var(--input-bg) !important;
298
+ overflow: hidden !important;
299
+ transition: var(--transition) !important;
300
+ }
301
+
302
+ .gradio-dropdown > div:hover {
303
+ border-color: var(--primary-light) !important;
304
+ }
305
+
306
+ /* Radio and checkbox styling */
307
+ .gradio-radio, .gradio-checkbox {
308
+ background-color: var(--card-bg) !important;
309
+ border-radius: var(--border-radius) !important;
310
+ padding: 12px !important;
311
+ margin-bottom: 16px !important;
312
+ transition: var(--transition) !important;
313
+ border: 2px solid var(--input-bg) !important;
314
+ }
315
+
316
+ .gradio-radio:hover, .gradio-checkbox:hover {
317
+ border-color: var(--primary-light) !important;
318
+ }
319
+
320
+ .gradio-radio input[type="radio"] + label {
321
+ padding: 8px 12px !important;
322
+ border-radius: 20px !important;
323
+ margin-right: 8px !important;
324
+ background-color: var(--input-bg) !important;
325
+ transition: var(--transition) !important;
326
+ }
327
+
328
+ .gradio-radio input[type="radio"]:checked + label {
329
+ background-color: var(--primary) !important;
330
+ color: white !important;
331
+ }
332
+
333
+ /* Custom spacing and layout */
334
+ .gradio-row {
335
+ margin-bottom: 24px !important;
336
+ }
337
+
338
+ .option-row {
339
+ display: flex !important;
340
+ gap: 16px !important;
341
+ margin-bottom: 16px !important;
342
+ }
343
+
344
+ /* Card-like sections */
345
+ .card-section {
346
+ background-color: var(--card-bg) !important;
347
+ border-radius: var(--border-radius) !important;
348
+ padding: 20px !important;
349
+ margin-bottom: 24px !important;
350
+ box-shadow: var(--shadow) !important;
351
+ border: 1px solid rgba(0, 0, 0, 0.05) !important;
352
+ }
353
+
354
+ /* Search box styling */
355
+ .search-box input {
356
+ border-radius: var(--border-radius) !important;
357
+ border: 2px solid var(--input-bg) !important;
358
+ padding: 12px 20px !important;
359
+ box-shadow: var(--shadow) !important;
360
+ transition: var(--transition) !important;
361
+ }
362
+
363
+ .search-box input:focus {
364
+ border-color: var(--primary) !important;
365
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
366
+ }
367
+
368
+ /* Model name textbox specific styling */
369
+ .model-name-textbox {
370
+ border: 2px solid var(--input-bg) !important;
371
+ border-radius: var(--border-radius) !important;
372
+ transition: var(--transition) !important;
373
+ }
374
+
375
+ .model-name-textbox:focus-within {
376
+ border-color: var(--primary) !important;
377
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
378
+ }
379
+
380
+ /* Success, warning and error boxes */
381
+ .success-box, .warning-box, .error-box {
382
+ border-radius: var(--border-radius) !important;
383
+ padding: 20px !important;
384
+ margin: 20px 0 !important;
385
+ box-shadow: var(--shadow) !important;
386
+ animation: fadeIn 0.5s ease-in-out;
387
+ }
388
+
389
+ .success-box {
390
+ background-color: rgba(16, 185, 129, 0.1) !important;
391
+ border: 2px solid var(--success) !important;
392
+ }
393
+
394
+ .warning-box {
395
+ background-color: rgba(245, 158, 11, 0.1) !important;
396
+ border: 2px solid var(--warning) !important;
397
+ }
398
+
399
+ .error-box {
400
+ background-color: rgba(239, 68, 68, 0.1) !important;
401
+ border: 2px solid var(--error) !important;
402
+ }
403
+
404
+ /* Model link styling */
405
+ .model-link {
406
+ display: inline-block !important;
407
+ background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
408
+ color: white !important;
409
+ text-decoration: none !important;
410
+ padding: 12px 24px !important;
411
+ border-radius: var(--border-radius) !important;
412
+ font-weight: 600 !important;
413
+ margin-top: 16px !important;
414
+ box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
415
+ transition: var(--transition) !important;
416
+ }
417
+
418
+ .model-link:hover {
419
+ transform: translateY(-2px) !important;
420
+ box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
421
+ }
422
+
423
+ /* Instructions section */
424
+ .instructions-container {
425
+ background-color: rgba(99, 102, 241, 0.05) !important;
426
+ border-left: 4px solid var(--primary) !important;
427
+ padding: 16px !important;
428
+ margin: 24px 0 !important;
429
+ border-radius: 0 var(--border-radius) var(--border-radius) 0 !important;
430
+ }
431
+
432
+ /* Animations */
433
+ @keyframes fadeIn {
434
+ from { opacity: 0; transform: translateY(10px); }
435
+ to { opacity: 1; transform: translateY(0); }
436
+ }
437
+
438
+ /* Responsive adjustments */
439
+ @media (max-width: 768px) {
440
+ .option-row {
441
+ flex-direction: column !important;
442
+ }
443
+ }
444
+
445
+ /* Add a nice gradient splash to the app */
446
+ .gradio-container::before {
447
+ content: "";
448
+ position: absolute;
449
+ top: 0;
450
+ left: 0;
451
+ right: 0;
452
+ height: 10px;
453
+ background: linear-gradient(90deg, var(--primary), var(--accent));
454
+ z-index: 100;
455
+ }
456
+
457
+ /* Stylish header */
458
+ .app-header {
459
+ display: flex;
460
+ flex-direction: column;
461
+ align-items: center;
462
+ margin-bottom: 2rem;
463
+ position: relative;
464
+ }
465
+
466
+ .app-header::after {
467
+ content: "";
468
+ position: absolute;
469
+ bottom: -10px;
470
+ left: 50%;
471
+ transform: translateX(-50%);
472
+ width: 80px;
473
+ height: 4px;
474
+ background: linear-gradient(90deg, var(--primary), var(--accent));
475
+ border-radius: 2px;
476
+ }
477
+
478
+ /* Section headers */
479
+ .section-header {
480
+ display: flex;
481
+ align-items: center;
482
+ margin-bottom: 1rem;
483
+ }
484
+
485
+ .section-header::before {
486
+ content: "⚙️";
487
+ margin-right: 8px;
488
+ font-size: 1.25rem;
489
+ }
490
+
491
+ /* Quantize button special styling */
492
+ #quantize-button {
493
+ background: linear-gradient(135deg, var(--primary), var(--accent)) !important;
494
+ color: white !important;
495
+ padding: 16px 32px !important;
496
+ font-size: 1.1rem !important;
497
+ font-weight: 700 !important;
498
+ border: none !important;
499
+ border-radius: var(--border-radius) !important;
500
+ box-shadow: 0 4px 15px -3px rgba(99, 102, 241, 0.5) !important;
501
+ transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
502
+ position: relative;
503
+ overflow: hidden;
504
+ }
505
+
506
+ #quantize-button:hover {
507
+ transform: translateY(-3px) !important;
508
+ box-shadow: 0 7px 20px -2px rgba(99, 102, 241, 0.6) !important;
509
+ }
510
+
511
+ #quantize-button::after {
512
+ content: "";
513
+ position: absolute;
514
+ top: 0;
515
+ left: 0;
516
+ width: 100%;
517
+ height: 100%;
518
+ background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0));
519
+ transform: translateY(-100%);
520
+ transition: transform 0.6s cubic-bezier(0.25, 0.8, 0.25, 1);
521
+ }
522
+
523
+ #quantize-button:hover::after {
524
+ transform: translateY(0);
525
+ }
526
+ """
527
+
528
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald"), css=css) as demo:
529
+ with gr.Column(elem_classes="main-container"):
530
+ with gr.Row(elem_classes="app-header"):
531
+ gr.Markdown(
532
+ """
533
+ <h1 style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;"> 🤗 BitsAndBytes Model Quantizer</h1>
534
+
535
+ <div style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;">
536
+ Welcome to the BitsAndBytes Model Quantizer!
537
+ </div>
538
+ """
539
+ )
540
+
541
+ gr.LoginButton(elem_id="login-button", elem_classes="login-button")
542
+
543
+ welcome_msg = gr.Markdown(elem_classes="welcome-message")
544
+ demo.load(hello, inputs=None, outputs=welcome_msg)
545
+
546
+ instructions = gr.Markdown(
547
+ """
548
+ <div class="instructions-container">
549
+ <h3>📋 Instructions</h3>
550
+ <ol>
551
+ <li>Login to your HuggingFace account</li>
552
+ <li>Enter the name of the Hugging Face LLM model you want to quantize</li>
553
+ <li>Configure quantization settings based on your needs</li>
554
+ <li>Optionally, specify a custom name for the quantized model</li>
555
+ <li>Click "Quantize Model" to start the process</li>
556
+ </ol>
557
+ <p><strong>Note:</strong> Processing time depends on model size and your hardware. Check container logs for progress!</p>
558
+ </div>
559
+ """,
560
+ visible=False
561
+ )
562
+
563
+ instructions_visible = gr.State(False)
564
+ toggle_button = gr.Button("▼ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button")
565
+
566
+ def toggle_instructions(instructions_visible):
567
+ new_visibility = not instructions_visible
568
+ new_label = "▲ Hide Instructions" if new_visibility else "▼ Show Instructions"
569
+ return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label)
570
+
571
+ toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button])
572
+
573
+ with gr.Row(elem_classes="app-content"):
574
+ with gr.Column(scale=1, elem_classes="card-section"):
575
+ with gr.Row(elem_classes="search-section"):
576
+ model_name = HuggingfaceHubSearch(
577
+ label="🔍 Select Model",
578
+ placeholder=" Search for model on Huggingface Hub...",
579
+ search_type="model",
580
+ elem_classes="search-box"
581
+ )
582
+
583
+ with gr.Row(elem_classes="section-header"):
584
+ gr.Markdown("### Quantization Settings")
585
+
586
+ with gr.Column(elem_classes="settings-group"):
587
+ gr.Markdown("**Quantization Type**", elem_classes="setting-label")
588
+ quant_type_4 = gr.Dropdown(
589
+ choices=["fp4", "nf4"],
590
+ value="fp4",
591
+ label="Format",
592
+ info="The quantization data type in bnb.nn.Linear4Bit layers",
593
+ show_label=False
594
+ )
595
+
596
+ gr.Markdown("**Compute Settings**", elem_classes="setting-label")
597
+ compute_type_4 = gr.Dropdown(
598
+ choices=["float16", "bfloat16", "float32"],
599
+ value="float32",
600
+ label="Compute Type",
601
+ info="The compute dtype for matrix multiplication"
602
+ )
603
+
604
+ quant_storage_4 = gr.Dropdown(
605
+ choices=["float16", "float32", "int8", "uint8", "bfloat16"],
606
+ value="uint8",
607
+ label="Storage Type",
608
+ info="The storage type for quantized weights"
609
+ )
610
+
611
+ gr.Markdown("**Double Quantization**", elem_classes="setting-label")
612
+ double_quant_4 = gr.Radio(
613
+ ["False", "True"],
614
+ label="Use Double Quantization",
615
+ info="Further compress model size with nested quantization",
616
+ value="False",
617
+ )
618
+
619
+ with gr.Row(elem_classes="section-header"):
620
+ gr.Markdown("### Output Settings")
621
+
622
+ with gr.Column(elem_classes="settings-group"):
623
+ quantized_model_name = gr.Textbox(
624
+ label="Custom Model Name (Optional)",
625
+ info="Leave blank to use default naming convention",
626
+ placeholder="my-quantized-model",
627
+ elem_classes="model-name-textbox"
628
+ )
629
+
630
+ public = gr.Checkbox(
631
+ label="Make model public",
632
+ info="If checked, your model will be publicly accessible on Hugging Face Hub",
633
+ value=False,
634
+ )
635
+
636
+ with gr.Column(scale=1, elem_classes="card-section"):
637
+ with gr.Row():
638
+ gr.Markdown("""
639
+ ### 📊 Quantization Benefits
640
+
641
+ <div style="background-color: rgba(99, 102, 241, 0.05); padding: 12px; border-radius: 8px; margin-bottom: 16px;">
642
+ <p><strong>⚡ Lower Memory Usage:</strong> Reduce model size by up to 75%</p>
643
+ <p><strong>🚀 Faster Inference:</strong> Achieve better performance on resource-constrained hardware</p>
644
+ <p><strong>💻 Wider Compatibility:</strong> Run models on devices with limited VRAM</p>
645
+ </div>
646
+
647
+ ### 🔧 Configuration Guide
648
+
649
+ <div style="background-color: rgba(16, 185, 129, 0.05); padding: 12px; border-radius: 8px;">
650
+ <p><strong>Quantization Type:</strong></p>
651
+ <ul>
652
+ <li><code>fp4</code> - 4-bit floating point (better for most cases)</li>
653
+ <li><code>nf4</code> - normalized float format (better for specific models)</li>
654
+ </ul>
655
+ <p><strong>Double Quantization:</strong> Enable for additional compression with minimal quality loss</p>
656
+ </div>
657
+ """)
658
+
659
+ with gr.Row():
660
+ quantize_button = gr.Button("🚀 Quantize Model", variant="primary", elem_id="quantize-button")
661
+
662
+ output_link = gr.HTML(label="Results", elem_classes="results-container")
663
+
664
+ # Add interactive footer with links
665
+ gr.Markdown("""
666
+ <div style="margin-top: 2rem; text-align: center; padding: 1rem; border-top: 1px solid rgba(99, 102, 241, 0.2);">
667
+ <p>Powered by <a href="https://huggingface.co/" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">Hugging Face</a> and <a href="https://github.com/TimDettmers/bitsandbytes" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">BitsAndBytes</a></p>
668
+ </div>
669
+ """)
670
+
671
+ quantize_button.click(
672
+ fn=quantize_and_save,
673
+ inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
674
+ outputs=[output_link]
675
+ )
676
+
677
+ if __name__ == "__main__":
678
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -2,4 +2,4 @@ transformers
2
  accelerate
3
  huggingface-hub
4
  gradio-huggingfacehub-search
5
- https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl
 
2
  accelerate
3
  huggingface-hub
4
  gradio-huggingfacehub-search
5
+ bitsandbytes