John6666 commited on
Commit
c557532
·
verified ·
1 Parent(s): 441a0f2

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +4 -3
  2. app.py +45 -0
  3. packages.txt +1 -0
  4. quantizer_gr.py +129 -0
  5. requirements.txt +8 -0
  6. utils.py +197 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Quantizer Alpha
3
- emoji: 📊
4
- colorFrom: green
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.0.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Quantizer Alpha (Does not work in CPU space)
3
+ emoji: 🦙🤗
4
+ colorFrom: gray
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.0.2
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from quantizer_gr import quantize_gr, get_model_class
3
+
4
+ css = """
5
+ .title { font-size: 3em; align-items: center; text-align: center; }
6
+ .info { align-items: center; text-align: center; }
7
+ .block.result { margin: 1em 0; padding: 1em; box-shadow: 0 0 3px 3px #664422, 0 0 3px 2px #664422 inset; border-radius: 6px; background: #665544; }
8
+ .desc [src$='#float'] { float: right; margin: 20px; }
9
+ """
10
+
11
+ with gr.Blocks(theme="NoCrypt/miku@>=1.2.2", fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
12
+ with gr.Column():
13
+ gr.Markdown("# Quantizer Alpha (Does not work in CPU space)", elem_classes="title")
14
+ with gr.Group():
15
+ with gr.Row():
16
+ repo_id = gr.Textbox(label="Repo ID", placeholder="author/model", value="", lines=1)
17
+ with gr.Column():
18
+ hf_token = gr.Textbox(label="Your HF write token", placeholder="hf_...", value="", max_lines=1)
19
+ gr.Markdown("Your token is available at [hf.co/settings/tokens](https://huggingface.co/settings/tokens).", elem_classes="info")
20
+ with gr.Row():
21
+ newrepo_id = gr.Textbox(label="Upload repo ID", placeholder="yourid/newrepo", value="", max_lines=1)
22
+ is_private = gr.Checkbox(label="Create private repo", value=True)
23
+ is_overwrite = gr.Checkbox(label="Overwrite repo", value=False)
24
+ with gr.Accordion("Advanced", open=False):
25
+ with gr.Row():
26
+ qtype = gr.Radio(label="Quantization algorithm", choices=["nf4"], value="nf4")
27
+ dtype = gr.Radio(label="Computation data type", choices=["fp16", "fp32", "bf16", "fp8", "default"], value="bf16")
28
+ mclass = gr.Radio(label="Model class", choices=get_model_class(), value=get_model_class()[0])
29
+ run_button = gr.Button(value="Run", variant="primary")
30
+ with gr.Group():
31
+ uploaded_urls = gr.CheckboxGroup(visible=False, choices=[], value=[]) # hidden
32
+ urls_md = gr.Markdown("<br><br>", elem_classes="result", visible=True)
33
+ clear_button = gr.Button(value="Clear Output", variant="secondary")
34
+ gr.DuplicateButton(value="Duplicate Space")
35
+
36
+ gr.on(
37
+ triggers=[run_button.click],
38
+ fn=quantize_gr,
39
+ inputs=[repo_id, hf_token, uploaded_urls, newrepo_id, is_private, is_overwrite, dtype, qtype, mclass],
40
+ outputs=[uploaded_urls, urls_md],
41
+ )
42
+ clear_button.click(lambda: ([], "<br><br>"), None, [uploaded_urls, urls_md], queue=False, show_api=False)
43
+
44
+ demo.queue()
45
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ git-lfs aria2
quantizer_gr.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ if os.environ.get("SPACES_ZERO_GPU") is not None:
3
+ import spaces
4
+ else:
5
+ class spaces:
6
+ @staticmethod
7
+ def GPU(func):
8
+ def wrapper(*args, **kwargs):
9
+ return func(*args, **kwargs)
10
+ return wrapper
11
+ import gradio as gr
12
+ from pathlib import Path
13
+ import gc
14
+ import shutil
15
+ import torch
16
+ from utils import set_token, upload_repo, is_repo_exists, is_repo_name
17
+ from transformers import AutoTokenizer, AutoModelForCausalLM
18
+ from transformers import BitsAndBytesConfig
19
+
20
+
21
+ @spaces.GPU
22
+ def fake_gpu():
23
+ pass
24
+
25
+
26
+ MODEL_CLASS = {
27
+ "AutoModelForCausalLM": [AutoModelForCausalLM, AutoTokenizer],
28
+ }
29
+
30
+
31
+ DTYPE_DICT = {
32
+ "fp16": torch.float16,
33
+ "bf16": torch.bfloat16,
34
+ "fp32": torch.float32,
35
+ "fp8": torch.float8_e4m3fn
36
+ }
37
+
38
+
39
+ def get_model_class():
40
+ return list(MODEL_CLASS.keys())
41
+
42
+
43
+ def get_model(mclass: str):
44
+ return MODEL_CLASS.get(mclass, [AutoModelForCausalLM, AutoTokenizer])[0]
45
+
46
+
47
+ def get_tokenizer(mclass: str):
48
+ return MODEL_CLASS.get(mclass, [AutoModelForCausalLM, AutoTokenizer])[1]
49
+
50
+
51
+ def get_dtype(dtype: str):
52
+ return DTYPE_DICT.get(dtype, torch.bfloat16)
53
+
54
+
55
+ def save_readme_md(dir, repo_id):
56
+ orig_name = repo_id
57
+ orig_url = f"https://huggingface.co/{repo_id}/"
58
+ md = f"""---
59
+ license: other
60
+ language:
61
+ - en
62
+ library_name: transformers
63
+ base_model: {repo_id}
64
+ tags:
65
+ - transformers
66
+ ---
67
+ Quants of [{orig_name}]({orig_url}).
68
+ """
69
+ path = str(Path(dir, "README.md"))
70
+ with open(path, mode='w', encoding="utf-8") as f:
71
+ f.write(md)
72
+
73
+
74
+ @spaces.GPU
75
+ def quantize_repo(repo_id: str, dtype: str="bf16", qtype: str="nf4", mclass: str=get_model_class()[0], progress=gr.Progress(track_tqdm=True)):
76
+ progress(0, desc="Start quantizing...")
77
+ out_dir = repo_id.split("/")[-1]
78
+
79
+ type_kwargs = {}
80
+ if dtype != "default": type_kwargs["torch_dtype"] = get_dtype(dtype)
81
+
82
+ nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_quant_storage=get_dtype(dtype),
83
+ bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=get_dtype(dtype))
84
+ quant_kwargs = {}
85
+ if qtype == "nf4": quant_kwargs["quantization_config"] = nf4_config
86
+
87
+ progress(0.1, desc="Loading...")
88
+ tokenizer = get_tokenizer(mclass).from_pretrained(repo_id, legathy=False)
89
+ model = get_model(mclass).from_pretrained(repo_id, **type_kwargs, **quant_kwargs)
90
+
91
+ progress(0.5, desc="Saving...")
92
+ tokenizer.save_pretrained(out_dir)
93
+ model.save_pretrained(out_dir, safe_serialization=True)
94
+
95
+ if Path(out_dir).exists(): save_readme_md(out_dir, repo_id)
96
+
97
+ del tokenizer
98
+ del model
99
+ torch.cuda.empty_cache()
100
+ gc.collect()
101
+
102
+ progress(1, desc="Quantized.")
103
+ return out_dir
104
+
105
+ def quantize_gr(repo_id: str, hf_token: str, urls: list[str], newrepo_id: str, is_private: bool=True, is_overwrite: bool=False,
106
+ dtype: str="bf16", qtype: str="nf4", mclass: str=get_model_class()[0], progress=gr.Progress(track_tqdm=True)):
107
+ if not hf_token: hf_token = os.environ.get("HF_TOKEN") # default huggingface token
108
+ if not hf_token: raise gr.Error("HF write token is required for this process.")
109
+ set_token(hf_token)
110
+ if not newrepo_id: newrepo_id = os.environ.get("HF_OUTPUT_REPO") # default repo id
111
+ if not is_repo_name(repo_id): raise gr.Error(f"Invalid repo name: {repo_id}")
112
+ if not is_repo_name(newrepo_id): raise gr.Error(f"Invalid repo name: {newrepo_id}")
113
+ if not is_overwrite and is_repo_exists(newrepo_id): raise gr.Error(f"Repo already exists: {newrepo_id}")
114
+ progress(0, desc="Start quantizing...")
115
+ new_path = quantize_repo(repo_id, dtype, qtype, mclass)
116
+ if not new_path: return ""
117
+ if not urls: urls = []
118
+ progress(0.5, desc="Start uploading...")
119
+ repo_url = upload_repo(newrepo_id, new_path, is_private)
120
+ progress(1, desc="Processing...")
121
+ shutil.rmtree(new_path)
122
+ urls.append(repo_url)
123
+ md = "### Your new repo:\n"
124
+ for u in urls:
125
+ md += f"[{str(u).split('/')[-2]}/{str(u).split('/')[-1]}]({str(u)})<br>"
126
+ torch.cuda.empty_cache()
127
+ gc.collect()
128
+ return gr.update(value=urls, choices=urls), gr.update(value=md)
129
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ huggingface-hub
2
+ gdown
3
+ safetensors
4
+ torch
5
+ transformers==4.44.0
6
+ bitsandbytes
7
+ peft
8
+ accelerate
utils.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import HfApi, HfFolder, hf_hub_download, snapshot_download
3
+ import os
4
+ from pathlib import Path
5
+ import shutil
6
+ import gc
7
+ import re
8
+ import urllib.parse
9
+
10
+
11
+ def get_token():
12
+ try:
13
+ token = HfFolder.get_token()
14
+ except Exception:
15
+ token = ""
16
+ return token
17
+
18
+
19
+ def set_token(token):
20
+ try:
21
+ HfFolder.save_token(token)
22
+ except Exception:
23
+ print(f"Error: Failed to save token.")
24
+
25
+
26
+ def get_user_agent():
27
+ return 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
28
+
29
+
30
+ def is_repo_exists(repo_id: str, repo_type: str="model"):
31
+ hf_token = get_token()
32
+ api = HfApi(token=hf_token)
33
+ try:
34
+ if api.repo_exists(repo_id=repo_id, repo_type=repo_type, token=hf_token): return True
35
+ else: return False
36
+ except Exception as e:
37
+ print(f"Error: Failed to connect {repo_id} ({repo_type}). {e}")
38
+ return True # for safe
39
+
40
+
41
+ MODEL_TYPE_CLASS = {
42
+ "diffusers:StableDiffusionPipeline": "SD 1.5",
43
+ "diffusers:StableDiffusionXLPipeline": "SDXL",
44
+ "diffusers:FluxPipeline": "FLUX",
45
+ }
46
+
47
+
48
+ def get_model_type(repo_id: str):
49
+ hf_token = get_token()
50
+ api = HfApi(token=hf_token)
51
+ lora_filename = "pytorch_lora_weights.safetensors"
52
+ diffusers_filename = "model_index.json"
53
+ default = "SDXL"
54
+ try:
55
+ if api.file_exists(repo_id=repo_id, filename=lora_filename, token=hf_token): return "LoRA"
56
+ if not api.file_exists(repo_id=repo_id, filename=diffusers_filename, token=hf_token): return "None"
57
+ model = api.model_info(repo_id=repo_id, token=hf_token)
58
+ tags = model.tags
59
+ for tag in tags:
60
+ if tag in MODEL_TYPE_CLASS.keys(): return MODEL_TYPE_CLASS.get(tag, default)
61
+ except Exception:
62
+ return default
63
+ return default
64
+
65
+
66
+ def list_uniq(l):
67
+ return sorted(set(l), key=l.index)
68
+
69
+
70
+ def list_sub(a, b):
71
+ return [e for e in a if e not in b]
72
+
73
+
74
+ def is_repo_name(s):
75
+ return re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', s)
76
+
77
+
78
+ def split_hf_url(url: str):
79
+ try:
80
+ s = list(re.findall(r'^(?:https?://huggingface.co/)(?:(datasets)/)?(.+?/.+?)/\w+?/.+?/(?:(.+)/)?(.+?.\w+)(?:\?download=true)?$', url)[0])
81
+ if len(s) < 4: return "", "", "", ""
82
+ repo_id = s[1]
83
+ repo_type = "dataset" if s[0] == "datasets" else "model"
84
+ subfolder = urllib.parse.unquote(s[2]) if s[2] else None
85
+ filename = urllib.parse.unquote(s[3])
86
+ return repo_id, filename, subfolder, repo_type
87
+ except Exception as e:
88
+ print(e)
89
+
90
+
91
+ def download_hf_file(directory, url, progress=gr.Progress(track_tqdm=True)):
92
+ hf_token = get_token()
93
+ repo_id, filename, subfolder, repo_type = split_hf_url(url)
94
+ try:
95
+ if subfolder is not None: hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder, repo_type=repo_type, local_dir=directory, token=hf_token)
96
+ else: hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, local_dir=directory, token=hf_token)
97
+ except Exception as e:
98
+ print(f"Failed to download: {e}")
99
+
100
+
101
+ def download_thing(directory, url, civitai_api_key="", progress=gr.Progress(track_tqdm=True)): # requires aria2, gdown
102
+ hf_token = get_token()
103
+ url = url.strip()
104
+ if "drive.google.com" in url:
105
+ original_dir = os.getcwd()
106
+ os.chdir(directory)
107
+ os.system(f"gdown --fuzzy {url}")
108
+ os.chdir(original_dir)
109
+ elif "huggingface.co" in url:
110
+ url = url.replace("?download=true", "")
111
+ if "/blob/" in url:
112
+ url = url.replace("/blob/", "/resolve/")
113
+ #user_header = f'"Authorization: Bearer {hf_token}"'
114
+ if True or hf_token:
115
+ download_hf_file(directory, url)
116
+ #os.system(f"aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 {url} -d {directory} -o {url.split('/')[-1]}")
117
+ else:
118
+ os.system(f"aria2c --optimize-concurrent-downloads --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 {url} -d {directory} -o {url.split('/')[-1]}")
119
+ elif "civitai.com" in url:
120
+ if "?" in url:
121
+ url = url.split("?")[0]
122
+ if civitai_api_key:
123
+ url = url + f"?token={civitai_api_key}"
124
+ os.system(f"aria2c --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 -d {directory} {url}")
125
+ else:
126
+ print("You need an API key to download Civitai models.")
127
+ else:
128
+ os.system(f"aria2c --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 -d {directory} {url}")
129
+
130
+
131
+ def get_local_model_list(dir_path):
132
+ model_list = []
133
+ valid_extensions = ('.safetensors', '.fp16.safetensors', '.sft')
134
+ for file in Path(dir_path).glob("**/*.*"):
135
+ if file.is_file() and file.suffix in valid_extensions:
136
+ file_path = str(file)
137
+ model_list.append(file_path)
138
+ return model_list
139
+
140
+
141
+ def get_download_file(temp_dir, url, civitai_key, progress=gr.Progress(track_tqdm=True)):
142
+ if not "http" in url and is_repo_name(url) and not Path(url).exists():
143
+ print(f"Use HF Repo: {url}")
144
+ new_file = url
145
+ elif not "http" in url and Path(url).exists():
146
+ print(f"Use local file: {url}")
147
+ new_file = url
148
+ elif Path(f"{temp_dir}/{url.split('/')[-1]}").exists():
149
+ print(f"File to download alreday exists: {url}")
150
+ new_file = f"{temp_dir}/{url.split('/')[-1]}"
151
+ else:
152
+ print(f"Start downloading: {url}")
153
+ before = get_local_model_list(temp_dir)
154
+ try:
155
+ download_thing(temp_dir, url.strip(), civitai_key)
156
+ except Exception:
157
+ print(f"Download failed: {url}")
158
+ return ""
159
+ after = get_local_model_list(temp_dir)
160
+ new_file = list_sub(after, before)[0] if list_sub(after, before) else ""
161
+ if not new_file:
162
+ print(f"Download failed: {url}")
163
+ return ""
164
+ print(f"Download completed: {url}")
165
+ return new_file
166
+
167
+
168
+ # https://huggingface.co/docs/huggingface_hub/v0.25.1/en/package_reference/file_download#huggingface_hub.snapshot_download
169
+ def download_repo(repo_id, dir_path, progress=gr.Progress(track_tqdm=True)):
170
+ hf_token = get_token()
171
+ try:
172
+ snapshot_download(repo_id=repo_id, local_dir=dir_path, token=hf_token, allow_patterns=["*.safetensors", "*.bin"],
173
+ ignore_patterns=["*.fp16.*", "/*.safetensors", "/*.bin"], force_download=True)
174
+ return True
175
+ except Exception as e:
176
+ print(f"Error: Failed to download {repo_id}. {e}")
177
+ gr.Warning(f"Error: Failed to download {repo_id}. {e}")
178
+ return False
179
+
180
+
181
+ def upload_repo(new_repo_id, dir_path, is_private, progress=gr.Progress(track_tqdm=True)):
182
+ hf_token = get_token()
183
+ api = HfApi(token=hf_token)
184
+ try:
185
+ progress(0, desc="Start uploading...")
186
+ api.create_repo(repo_id=new_repo_id, token=hf_token, private=is_private, exist_ok=True)
187
+ for path in Path(dir_path).glob("*"):
188
+ if path.is_dir():
189
+ api.upload_folder(repo_id=new_repo_id, folder_path=str(path), path_in_repo=path.name, token=hf_token)
190
+ elif path.is_file():
191
+ api.upload_file(repo_id=new_repo_id, path_or_fileobj=str(path), path_in_repo=path.name, token=hf_token)
192
+ progress(1, desc="Uploaded.")
193
+ url = f"https://huggingface.co/{new_repo_id}"
194
+ except Exception as e:
195
+ print(f"Error: Failed to upload to {new_repo_id}. {e}")
196
+ return ""
197
+ return url