quantizer

Build error

App Files Files Community

John6666 commited on Oct 14, 2024

Commit

c557532

verified ·

1 Parent(s): 441a0f2

Upload 6 files

Browse files

Files changed (6) hide show

README.md +4 -3
app.py +45 -0
packages.txt +1 -0
quantizer_gr.py +129 -0
requirements.txt +8 -0
utils.py +197 -0

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: Quantizer Alpha
-emoji: 📊
-colorFrom: green
 colorTo: pink
 sdk: gradio
 sdk_version: 5.0.2
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Quantizer Alpha (Does not work in CPU space)
+emoji: 🦙🤗
+colorFrom: gray
 colorTo: pink
 sdk: gradio
 sdk_version: 5.0.2
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import gradio as gr
+from quantizer_gr import quantize_gr, get_model_class
+css = """
+.title { font-size: 3em; align-items: center; text-align: center; }
+.info { align-items: center; text-align: center; }
+.block.result { margin: 1em 0; padding: 1em; box-shadow: 0 0 3px 3px #664422, 0 0 3px 2px #664422 inset; border-radius: 6px; background: #665544; }
+.desc [src$='#float'] { float: right; margin: 20px; }
+"""
+with gr.Blocks(theme="NoCrypt/miku@>=1.2.2", fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
+    with gr.Column():
+        gr.Markdown("# Quantizer Alpha (Does not work in CPU space)", elem_classes="title")
+        with gr.Group():
+            with gr.Row():
+                repo_id = gr.Textbox(label="Repo ID", placeholder="author/model", value="", lines=1)
+                with gr.Column():
+                    hf_token = gr.Textbox(label="Your HF write token", placeholder="hf_...", value="", max_lines=1)
+                    gr.Markdown("Your token is available at [hf.co/settings/tokens](https://huggingface.co/settings/tokens).", elem_classes="info")
+            with gr.Row():
+                newrepo_id = gr.Textbox(label="Upload repo ID", placeholder="yourid/newrepo", value="", max_lines=1)
+                is_private = gr.Checkbox(label="Create private repo", value=True)
+                is_overwrite = gr.Checkbox(label="Overwrite repo", value=False)
+            with gr.Accordion("Advanced", open=False):
+                with gr.Row():
+                    qtype = gr.Radio(label="Quantization algorithm", choices=["nf4"], value="nf4")
+                    dtype = gr.Radio(label="Computation data type", choices=["fp16", "fp32", "bf16", "fp8", "default"], value="bf16")
+                    mclass = gr.Radio(label="Model class", choices=get_model_class(), value=get_model_class()[0])
+        run_button = gr.Button(value="Run", variant="primary")
+        with gr.Group():
+            uploaded_urls = gr.CheckboxGroup(visible=False, choices=[], value=[]) # hidden
+            urls_md = gr.Markdown("<br><br>", elem_classes="result", visible=True)
+            clear_button = gr.Button(value="Clear Output", variant="secondary")
+    gr.DuplicateButton(value="Duplicate Space")
+    gr.on(
+        triggers=[run_button.click],
+        fn=quantize_gr,
+        inputs=[repo_id, hf_token, uploaded_urls, newrepo_id, is_private, is_overwrite, dtype, qtype, mclass],
+        outputs=[uploaded_urls, urls_md],
+    )
+    clear_button.click(lambda: ([], "<br><br>"), None, [uploaded_urls, urls_md], queue=False, show_api=False)
+demo.queue()
+demo.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ git-lfs aria2

quantizer_gr.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+if os.environ.get("SPACES_ZERO_GPU") is not None:
+    import spaces
+else:
+    class spaces:
+        @staticmethod
+        def GPU(func):
+            def wrapper(*args, **kwargs):
+                return func(*args, **kwargs)
+            return wrapper
+import gradio as gr
+from pathlib import Path
+import gc
+import shutil
+import torch
+from utils import set_token, upload_repo, is_repo_exists, is_repo_name
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import BitsAndBytesConfig
+@spaces.GPU
+def fake_gpu():
+    pass
+MODEL_CLASS = {
+    "AutoModelForCausalLM": [AutoModelForCausalLM, AutoTokenizer],
+}
+DTYPE_DICT = {
+    "fp16": torch.float16,
+    "bf16": torch.bfloat16,
+    "fp32": torch.float32,
+    "fp8": torch.float8_e4m3fn
+}
+def get_model_class():
+    return list(MODEL_CLASS.keys())
+def get_model(mclass: str):
+    return MODEL_CLASS.get(mclass, [AutoModelForCausalLM, AutoTokenizer])[0]
+def get_tokenizer(mclass: str):
+    return MODEL_CLASS.get(mclass, [AutoModelForCausalLM, AutoTokenizer])[1]
+def get_dtype(dtype: str):
+    return DTYPE_DICT.get(dtype, torch.bfloat16)
+def save_readme_md(dir, repo_id):
+    orig_name = repo_id
+    orig_url = f"https://huggingface.co/{repo_id}/"
+    md = f"""---
+license: other
+language:
+- en
+library_name: transformers
+base_model: {repo_id}
+tags:
+- transformers
+---
+Quants of [{orig_name}]({orig_url}).
+"""
+    path = str(Path(dir, "README.md"))
+    with open(path, mode='w', encoding="utf-8") as f:
+        f.write(md)
+@spaces.GPU
+def quantize_repo(repo_id: str, dtype: str="bf16", qtype: str="nf4", mclass: str=get_model_class()[0], progress=gr.Progress(track_tqdm=True)):
+    progress(0, desc="Start quantizing...")
+    out_dir = repo_id.split("/")[-1]
+    type_kwargs = {}
+    if dtype != "default": type_kwargs["torch_dtype"] = get_dtype(dtype)
+    nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_quant_storage=get_dtype(dtype),
+                                    bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=get_dtype(dtype))
+    quant_kwargs = {}
+    if qtype == "nf4": quant_kwargs["quantization_config"] = nf4_config
+    progress(0.1, desc="Loading...")
+    tokenizer = get_tokenizer(mclass).from_pretrained(repo_id, legathy=False)
+    model = get_model(mclass).from_pretrained(repo_id, **type_kwargs, **quant_kwargs)
+    progress(0.5, desc="Saving...")
+    tokenizer.save_pretrained(out_dir)
+    model.save_pretrained(out_dir, safe_serialization=True)
+    if Path(out_dir).exists(): save_readme_md(out_dir, repo_id)
+    del tokenizer
+    del model
+    torch.cuda.empty_cache()
+    gc.collect()
+    progress(1, desc="Quantized.")
+    return out_dir
+def quantize_gr(repo_id: str, hf_token: str, urls: list[str], newrepo_id: str, is_private: bool=True, is_overwrite: bool=False,
+                dtype: str="bf16", qtype: str="nf4", mclass: str=get_model_class()[0], progress=gr.Progress(track_tqdm=True)):
+    if not hf_token: hf_token = os.environ.get("HF_TOKEN") # default huggingface token
+    if not hf_token: raise gr.Error("HF write token is required for this process.")
+    set_token(hf_token)
+    if not newrepo_id: newrepo_id = os.environ.get("HF_OUTPUT_REPO") # default repo id
+    if not is_repo_name(repo_id): raise gr.Error(f"Invalid repo name: {repo_id}")
+    if not is_repo_name(newrepo_id): raise gr.Error(f"Invalid repo name: {newrepo_id}")
+    if not is_overwrite and is_repo_exists(newrepo_id): raise gr.Error(f"Repo already exists: {newrepo_id}")
+    progress(0, desc="Start quantizing...")
+    new_path = quantize_repo(repo_id, dtype, qtype, mclass)
+    if not new_path: return ""
+    if not urls: urls = []
+    progress(0.5, desc="Start uploading...")
+    repo_url = upload_repo(newrepo_id, new_path, is_private)
+    progress(1, desc="Processing...")
+    shutil.rmtree(new_path)
+    urls.append(repo_url)
+    md = "### Your new repo:\n"
+    for u in urls:
+        md += f"[{str(u).split('/')[-2]}/{str(u).split('/')[-1]}]({str(u)})<br>"
+    torch.cuda.empty_cache()
+    gc.collect()
+    return gr.update(value=urls, choices=urls), gr.update(value=md)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+huggingface-hub
+gdown
+safetensors
+torch
+transformers==4.44.0
+bitsandbytes
+peft
+accelerate

utils.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import gradio as gr
+from huggingface_hub import HfApi, HfFolder, hf_hub_download, snapshot_download
+import os
+from pathlib import Path
+import shutil
+import gc
+import re
+import urllib.parse
+def get_token():
+    try:
+        token = HfFolder.get_token()
+    except Exception:
+        token = ""
+    return token
+def set_token(token):
+    try:
+        HfFolder.save_token(token)
+    except Exception:
+        print(f"Error: Failed to save token.")
+def get_user_agent():
+    return 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
+def is_repo_exists(repo_id: str, repo_type: str="model"):
+    hf_token = get_token()
+    api = HfApi(token=hf_token)
+    try:
+        if api.repo_exists(repo_id=repo_id, repo_type=repo_type, token=hf_token): return True
+        else: return False
+    except Exception as e:
+        print(f"Error: Failed to connect {repo_id} ({repo_type}). {e}")
+        return True # for safe
+MODEL_TYPE_CLASS = {
+    "diffusers:StableDiffusionPipeline": "SD 1.5",
+    "diffusers:StableDiffusionXLPipeline": "SDXL",
+    "diffusers:FluxPipeline": "FLUX",
+}
+def get_model_type(repo_id: str):
+    hf_token = get_token()
+    api = HfApi(token=hf_token)
+    lora_filename = "pytorch_lora_weights.safetensors"
+    diffusers_filename = "model_index.json"
+    default = "SDXL"
+    try:
+        if api.file_exists(repo_id=repo_id, filename=lora_filename, token=hf_token): return "LoRA"
+        if not api.file_exists(repo_id=repo_id, filename=diffusers_filename, token=hf_token): return "None"
+        model = api.model_info(repo_id=repo_id, token=hf_token)
+        tags = model.tags
+        for tag in tags:
+            if tag in MODEL_TYPE_CLASS.keys(): return MODEL_TYPE_CLASS.get(tag, default)
+    except Exception:
+        return default
+    return default
+def list_uniq(l):
+    return sorted(set(l), key=l.index)
+def list_sub(a, b):
+    return [e for e in a if e not in b]
+def is_repo_name(s):
+    return re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', s)
+def split_hf_url(url: str):
+    try:
+        s = list(re.findall(r'^(?:https?://huggingface.co/)(?:(datasets)/)?(.+?/.+?)/\w+?/.+?/(?:(.+)/)?(.+?.\w+)(?:\?download=true)?$', url)[0])
+        if len(s) < 4: return "", "", "", ""
+        repo_id = s[1]
+        repo_type = "dataset" if s[0] == "datasets" else "model"
+        subfolder = urllib.parse.unquote(s[2]) if s[2] else None
+        filename = urllib.parse.unquote(s[3])
+        return repo_id, filename, subfolder, repo_type
+    except Exception as e:
+        print(e)
+def download_hf_file(directory, url, progress=gr.Progress(track_tqdm=True)):
+    hf_token = get_token()
+    repo_id, filename, subfolder, repo_type = split_hf_url(url)
+    try:
+        if subfolder is not None: hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder, repo_type=repo_type, local_dir=directory, token=hf_token)
+        else: hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, local_dir=directory, token=hf_token)
+    except Exception as e:
+        print(f"Failed to download: {e}")
+def download_thing(directory, url, civitai_api_key="", progress=gr.Progress(track_tqdm=True)): # requires aria2, gdown
+    hf_token = get_token()
+    url = url.strip()
+    if "drive.google.com" in url:
+        original_dir = os.getcwd()
+        os.chdir(directory)
+        os.system(f"gdown --fuzzy {url}")
+        os.chdir(original_dir)
+    elif "huggingface.co" in url:
+        url = url.replace("?download=true", "")
+        if "/blob/" in url:
+            url = url.replace("/blob/", "/resolve/")
+        #user_header = f'"Authorization: Bearer {hf_token}"'
+        if True or hf_token:
+            download_hf_file(directory, url)
+            #os.system(f"aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 {url} -d {directory}  -o {url.split('/')[-1]}")
+        else:
+            os.system(f"aria2c --optimize-concurrent-downloads --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 {url} -d {directory}  -o {url.split('/')[-1]}")
+    elif "civitai.com" in url:
+        if "?" in url:
+            url = url.split("?")[0]
+        if civitai_api_key:
+            url = url + f"?token={civitai_api_key}"
+            os.system(f"aria2c --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 -d {directory} {url}")
+        else:
+            print("You need an API key to download Civitai models.")
+    else:
+        os.system(f"aria2c --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 -d {directory} {url}")
+def get_local_model_list(dir_path):
+    model_list = []
+    valid_extensions = ('.safetensors', '.fp16.safetensors', '.sft')
+    for file in Path(dir_path).glob("**/*.*"):
+        if file.is_file() and file.suffix in valid_extensions:
+            file_path = str(file)
+            model_list.append(file_path)
+    return model_list
+def get_download_file(temp_dir, url, civitai_key, progress=gr.Progress(track_tqdm=True)):
+    if not "http" in url and is_repo_name(url) and not Path(url).exists():
+        print(f"Use HF Repo: {url}")
+        new_file = url
+    elif not "http" in url and Path(url).exists():
+        print(f"Use local file: {url}")
+        new_file = url
+    elif Path(f"{temp_dir}/{url.split('/')[-1]}").exists():
+        print(f"File to download alreday exists: {url}")
+        new_file = f"{temp_dir}/{url.split('/')[-1]}"
+    else:
+        print(f"Start downloading: {url}")
+        before = get_local_model_list(temp_dir)
+        try:
+            download_thing(temp_dir, url.strip(), civitai_key)
+        except Exception:
+            print(f"Download failed: {url}")
+            return ""
+        after = get_local_model_list(temp_dir)
+        new_file = list_sub(after, before)[0] if list_sub(after, before) else ""
+    if not new_file:
+        print(f"Download failed: {url}")
+        return ""
+    print(f"Download completed: {url}")
+    return new_file
+# https://huggingface.co/docs/huggingface_hub/v0.25.1/en/package_reference/file_download#huggingface_hub.snapshot_download
+def download_repo(repo_id, dir_path, progress=gr.Progress(track_tqdm=True)):
+    hf_token = get_token()
+    try:
+        snapshot_download(repo_id=repo_id, local_dir=dir_path, token=hf_token, allow_patterns=["*.safetensors", "*.bin"],
+                          ignore_patterns=["*.fp16.*", "/*.safetensors", "/*.bin"], force_download=True)
+        return True
+    except Exception as e:
+        print(f"Error: Failed to download {repo_id}. {e}")
+        gr.Warning(f"Error: Failed to download {repo_id}. {e}")
+        return False
+def upload_repo(new_repo_id, dir_path, is_private, progress=gr.Progress(track_tqdm=True)):
+    hf_token = get_token()
+    api = HfApi(token=hf_token)
+    try:
+        progress(0, desc="Start uploading...")
+        api.create_repo(repo_id=new_repo_id, token=hf_token, private=is_private, exist_ok=True)
+        for path in Path(dir_path).glob("*"):
+            if path.is_dir():
+                api.upload_folder(repo_id=new_repo_id, folder_path=str(path), path_in_repo=path.name, token=hf_token)
+            elif path.is_file():
+                api.upload_file(repo_id=new_repo_id, path_or_fileobj=str(path), path_in_repo=path.name, token=hf_token)
+        progress(1, desc="Uploaded.")
+        url = f"https://huggingface.co/{new_repo_id}"
+    except Exception as e:
+        print(f"Error: Failed to upload to {new_repo_id}. {e}")
+        return ""
+    return url