Add files using upload-large-folder tool

Browse files

Files changed (14) hide show

.gitattributes +1 -0
README.md +478 -0
config.json +49 -0
model-00001-of-00004.safetensors +3 -0
model-00002-of-00004.safetensors +3 -0
model-00003-of-00004.safetensors +3 -0
model-00004-of-00004.safetensors +3 -0
model.safetensors.index.json +1210 -0
pyproject.toml +30 -0
quant_log.csv +281 -0
quantize_config.json +21 -0
special_tokens_map.json +1025 -0
tokenizer.json +3 -0
tokenizer_config.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,478 @@

+---
+tags:
+- gptq
+- quantization
+- 4bit
+- confidentialmind
+- text-generation
+- apache2.0
+- mistral-small-24b
+---
+# 🔥 Quantized Model: Mistral-Small-24B-Instruct-2501_gptq_g128_4bit 🔥
+This is a 4-bit quantized version of [mistralai/Mistral-Small-24B-Instruct-2501](https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501) model, quantized by [ConfidentialMind.com](https://www.confidentialmind.com) 🤖✨
+It leverages the open-source GPTQModel quantization to achieve 4-bit precision with a group size of 128 resulting in a
+smaller,
+faster model with minimal performance degradation.
+Ran on a single NVIDIA A100 GPU with 80GB of VRAM.
+*Note* `batch_size` is set quite high as the model is small, you may need to adjust this to your GPU VRAM.
+## Model Details
+- **Original Model:** [mistralai/Mistral-Small-24B-Instruct-2501](https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501)
+- **Quantized Model:** Mistral-Small-24B-Instruct-2501_gptq_g128_4bit (this repository)
+- **Quantization Method:** GPTQ (4-bit, group size 128)
+- **Quantization Library:** [GPTQModel](https://github.com/ModelCloud/GPTQModel/tree/main)
+- **Calibration Dataset:** neuralmagic/LLM_compression_calibration (using 512 samples with seq len 4096)
+- **Quantized by:** [ConfidentialMind.com](https://www.confidentialmind.com)
+## Usage
+```python
+from gptqmodel import GPTQModel
+from transformers import AutoTokenizer
+# Use the local directory or JustJaro/Mistral-Small-24B-Instruct-2501_gptq_g128_4bit after upload
+quantized_model_id = "/home/jaro/models/quantized/Mistral-Small-24B-Instruct-2501_gptq_g128_4bit"  # or "JustJaro/Mistral-Small-24B-Instruct-2501_gptq_g128_4bit"
+tokenizer = AutoTokenizer.from_pretrained(quantized_model_id)
+model = GPTQModel.load(quantized_model_id, device="cuda:0")  # or "cpu"
+input_text = "This is a test prompt"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda:0")
+outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+## Package Versions and Installation Instructions
+See pyproject.toml for the exact UV project file.
+```bash
+pip install \
+  gptqmodel==1.9.0 \
+  typer==0.15.1 \
+  huggingface_hub==<version> \
+  datasets==3.3.0 \
+  transformers==4.48.3 \
+  safetensors==0.5.2 \
+  torch==2.6.0
+# Alternatively, use the provided pyproject.toml:
+```bash
+uv venv
+source venv/bin/activate
+uv sync
+```
+### Environment Variables
+```bash
+HF_TOKEN=<YOUR_HF_TOKEN>
+TOKENIZERS_PARALLELISM="true"
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+## Quantization Script
+Below is the exact quantize.py script used to generate this model (with the exact versions of the dependencies):
+<details><summary>Show Quantization Script</summary>
+```python
+#!/usr/bin/env python3
+"""
+This script loads a source Hugging Face model and a calibration dataset,
+quantizes the model using GPTQModel (with 4-bit precision and group size 128),
+saves the quantized model using the Transformers API with safetensors (safe serialization)
+under ~/models/quantized/, and then creates/updates a Hugging Face repository (with the
+_gptq_g128_4bit suffix) by uploading the model, tokenizer, and an auto-generated README.md.
+Usage example:
+    python quantize.py --source-model TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
+                       --calibration-dataset wikitext/wikitext-2-raw-v1 \
+                       --seq-len 1024 --nsamples 256 --hf-token <YOUR_HF_TOKEN>
+"""
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from typing import List
+import torch
+import typer
+from datasets import load_dataset
+from dotenv import load_dotenv, find_dotenv
+from gptqmodel import GPTQModel, QuantizeConfig
+from gptqmodel.utils import Perplexity
+# For later pushing to the model hub
+from huggingface_hub import HfApi
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+load_dotenv(find_dotenv())
+HF_TOKEN = os.getenv("HF_TOKEN")
+app = typer.Typer()
+def get_text_from_example(example: dict) -> str:
+    """
+    Returns text from a dataset example.
+    If the example contains a "text" field, and it is nonempty, that text is used.
+    Otherwise, if it has a "messages" field (a list of dicts with a "content" key),
+    the function returns the concatenation of all non-empty message contents.
+    """
+    if "text" in example and example["text"]:
+        return example["text"]
+    elif "messages" in example:
+        contents = [msg.get("content", "").strip() for msg in example["messages"]]
+        return " ".join([s for s in contents if s])
+    else:
+        return ""
+def get_calibration_dataset(
+    tokenizer: PreTrainedTokenizerBase,
+    nsamples: int,
+    seqlen: int,
+    calibration_dataset: str
+    ) -> List[dict]:
+    """
+    Loads a calibration dataset from the Hugging Face Hub (or from a local file).
+    It accepts datasets with a single "text" field (like wikitext)
+    or with a "messages" field (as in the Neural Magic LLM Compression Calibration dataset).
+    Only examples whose extracted text length is at least 'seqlen' are kept.
+    Each chosen example is tokenized (with truncation up to 'seqlen') and returned as a dict.
+    """
+    ds = None
+    try:
+        # Attempt to load from HF Hub.
+        try:
+            if "/" in calibration_dataset:
+                parts = calibration_dataset.split("/", 1)
+                ds = load_dataset(parts[0], parts[1], split="train")
+            else:
+                ds = load_dataset(calibration_dataset, split="train")
+        except Exception as e:
+            print(f"Error loading dataset '{calibration_dataset}' via load_dataset: {e}")
+            ds = load_dataset(calibration_dataset, split="train")
+            print(f"Loaded calibration dataset from full remote path {calibration_dataset}.")
+    except Exception as e:
+        print(f"Error loading dataset '{calibration_dataset}' via load_dataset: {e}")
+        # Fallback: if the supplied calibration_dataset is a local path, try to load it as JSON-lines.
+        if os.path.exists(calibration_dataset):
+            try:
+                ds = load_dataset("json", data_files=calibration_dataset, split="train")
+                print(f"Loaded calibration dataset from local file {calibration_dataset}.")
+            except Exception as e2:
+                print(f"Error loading local json dataset from '{calibration_dataset}': {e2}")
+                return []
+        else:
+            return []
+    print(f"Dataset features: {ds.features}")
+    # Filter examples that have at least 80% 'seqlen' of extracted text.
+    ds = ds.filter(lambda x: len(get_text_from_example(x)) >= int(seqlen*0.8))
+    sample_range = min(nsamples, len(ds))
+    calibration_data = []
+    for i in range(sample_range):
+        example = ds[i]
+        text = get_text_from_example(example)
+        tokenized = tokenizer(text, truncation=True, max_length=seqlen, return_tensors="pt")
+        tokenized = {k: v.squeeze(0) for k, v in tokenized.items()}
+        calibration_data.append(tokenized)
+    return calibration_data
+def calculate_avg_ppl(model, tokenizer):
+    """
+    Computes the average perplexity on the wikitext-2-raw-v1 train split using GPTQModel's Perplexity utility.
+    """
+    ppl = Perplexity(
+        model=model,
+        tokenizer=tokenizer,
+        dataset_path="wikitext",
+        dataset_name="wikitext-2-raw-v1",
+        split="train",
+        text_column="text",
+    )
+    ppl_values = ppl.calculate(n_ctx=512, n_batch=512)
+    avg = sum(ppl_values) / len(ppl_values)
+    return avg
+def get_pinned_package_versions():
+    """
+    Retrieves pinned package versions using 'uv pip freeze'.
+    Returns a dictionary mapping lowercased package names to their versions.
+    """
+    try:
+        result = subprocess.run(["uv", "pip", "freeze"], capture_output=True, text=True, check=True)
+        packages_output = result.stdout.strip()
+        versions = {}
+        for line in packages_output.splitlines():
+            if "==" in line:
+                package_name, package_version = line.split("==", 1)
+                versions[package_name.lower()] = package_version
+        return versions
+    except subprocess.CalledProcessError as e:
+        typer.echo(f"Error running 'uv pip freeze': {e}", err=True)
+        return {}
+    except FileNotFoundError:
+        typer.echo("uv command not found. Make sure uv is installed and in your PATH.", err=True)
+        return {}
+@app.command()
+def main(
+    seq_len: int = typer.Option(4096, help="Sequence length for tokenization and calibration."),
+    nsamples: int = typer.Option(512, help="Number of samples to use for calibration."),
+    source_model: str = typer.Option("mistralai/Mistral-Small-24B-Instruct-2501",
+                                     help="Source model HF repository identifier."),
+    calibration_dataset: str = typer.Option("wikitext/wikitext-2-raw-v1",
+                                              help="Calibration dataset identifier (in 'dataset/config' format) or local file path."),
+    hf_token: str = typer.Option(HF_TOKEN,
+                                 help="Hugging Face token for creating/updating your repo."),
+):
+    # Prepare destination directory and model names.
+    model_name = source_model.split("/")[-1]
+    quantized_model_name = f"{model_name}_gptq_g128_4bit"
+    quantized_model_dir = os.path.expanduser(os.path.join("~/models/quantized", quantized_model_name))
+    if not os.path.exists(quantized_model_dir):
+        os.makedirs(quantized_model_dir, exist_ok=True)
+        os.makedirs(quantized_model_dir, exist_ok=True)
+        typer.echo("Loading tokenizer from source model...")
+        tokenizer_obj = AutoTokenizer.from_pretrained(source_model, use_fast=True)
+        typer.echo("Loading calibration dataset...")
+        typer.echo(f"Calibration dataset: {calibration_dataset}")
+        calibration_data = get_calibration_dataset(tokenizer_obj, nsamples, seq_len, calibration_dataset)
+        if not calibration_data:
+            typer.echo("Calibration dataset is empty. Aborting.", err=True)
+            raise typer.Exit(code=1)
+        quantize_config = QuantizeConfig(bits=4, group_size=128, mse=0.01, damp_percent=0.015)
+        device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        typer.echo(f"Loading model in {device} mode...")
+        model = GPTQModel.load(source_model, quantize_config)
+        typer.echo("Quantizing model...")
+        model.quantize(calibration_data, auto_gc=False, batch_size=int(nsamples*0.1))
+        # Retrieve Hugging Face user info for README generation.
+        package_versions = get_pinned_package_versions()
+        username = get_my_user(hf_token)
+        script_content = self_read_script()
+        typer.echo(f"Saving quantized model to {quantized_model_dir} using Transformers safe serialization...")
+        try:
+            model.save_pretrained(quantized_model_dir)
+            tokenizer_obj.save_pretrained(quantized_model_dir)
+        except Exception as ex:
+            typer.echo(f"Error during saving with safe_serialization: {ex}. Aborting.")
+            raise
+        typer.echo(f"Model uploaded to Hugging Face repo: {quantized_model_name}")
+    else:
+        tokenizer_obj = AutoTokenizer.from_pretrained(source_model, use_fast=True)
+        package_versions = get_pinned_package_versions()
+        username = get_my_user(hf_token)
+        script_content = self_read_script()
+        device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    model = GPTQModel.load(quantized_model_dir, device=device)
+    avg_ppl = calculate_avg_ppl(model, tokenizer_obj)
+    typer.echo(f"Average perplexity (PPL) on wikitext v2 dataset: {avg_ppl}")
+    deps = Path("./pyproject.toml")
+    shutil.copy(deps, quantized_model_dir)
+    generate_readme(calibration_dataset, nsamples, package_versions, quantized_model_dir,
+                    quantized_model_name, script_content, seq_len, source_model, username, avg_ppl)
+    GPTQModel.push_to_hub(quantized_path=quantized_model_dir, private=False, repo_id=quantized_model_name,
+                          token=HF_TOKEN)
+    typer.echo(f"Model uploaded to Hugging Face repo: {quantized_model_name}")
+    demo_input = tokenizer_obj("test is", return_tensors="pt").to(device)
+    generated_ids = model.generate(**demo_input)
+    output_text = tokenizer_obj.decode(generated_ids[0])
+    typer.echo(f"Inference demo output: {output_text}")
+    typer.echo(f"Average perplexity (PPL) on calibration dataset: {avg_ppl}")
+def self_read_script():
+    try:
+        script_path = os.path.abspath(__file__)
+        with open(script_path, "r") as f:
+            script_content = f.read()
+    except Exception as e:
+        script_content = "Error reading script content: " + str(e)
+    return script_content
+def get_my_user(hf_token):
+    api = HfApi(token=hf_token)
+    user_info = api.whoami()
+    try:
+        username = user_info.get("name") or user_info.get("username")
+    except Exception as e:
+        typer.echo(f"Error retrieving username from Hugging Face API: {e}. Using default username.")
+        username = api.whoami()
+    if not username:
+        typer.echo("Could not determine your Hugging Face username from the token, defaulting to hard coded username.",
+                   err=True)
+        username = "JustJaro"
+    return username
+def generate_readme(calibration_dataset, nsamples, package_versions, quantized_model_dir,
+                    quantized_model_name, script_content, seq_len, source_model, username, avg_ppl):
+    readme_content = f"""---
+tags:
+- gptq
+- quantization
+- 4bit
+- confidentialmind
+- text-generation
+- apache2.0
+- mistral-small-24b
+---
+# 🔥 Quantized Model: {quantized_model_name} 🔥
+This is a 4-bit quantized version of [{source_model}](https://huggingface.co/{source_model}) model, quantized by [ConfidentialMind.com](https://www.confidentialmind.com) 🤖✨
+It leverages the open-source GPTQModel quantization to achieve 4-bit precision with a group size of 128 resulting in a
+smaller,
+faster model with minimal performance degradation.
+Ran on a single NVIDIA A100 GPU with 80GB of VRAM.
+*Note* `batch_size` is set quite high as the model is small, you may need to adjust this to your GPU VRAM.
+## Model Details
+- **Original Model:** [{source_model}](https://huggingface.co/{source_model})
+- **Quantized Model:** {quantized_model_name} (this repository)
+- **Quantization Method:** GPTQ (4-bit, group size 128)
+- **Quantization Library:** [GPTQModel](https://github.com/ModelCloud/GPTQModel/tree/main)
+- **Calibration Dataset:** {calibration_dataset} (using {nsamples} samples with seq len {seq_len})
+- **Quantized by:** [ConfidentialMind.com](https://www.confidentialmind.com)
+## Usage
+```python
+from gptqmodel import GPTQModel
+from transformers import AutoTokenizer
+# Use the local directory or {username}/{quantized_model_name} after upload
+quantized_model_id = "{quantized_model_dir}"  # or "{username}/{quantized_model_name}"
+tokenizer = AutoTokenizer.from_pretrained(quantized_model_id)
+model = GPTQModel.load(quantized_model_id, device="cuda:0")  # or "cpu"
+input_text = "This is a test prompt"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda:0")
+outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+## Package Versions and Installation Instructions
+See pyproject.toml for the exact UV project file.
+```bash
+pip install \\
+  gptqmodel=={package_versions.get('gptqmodel', '<version>')} \\
+  typer=={package_versions.get('typer', '<version>')} \\
+  huggingface_hub=={package_versions.get('huggingface_hub', '<version>')} \\
+  datasets=={package_versions.get('datasets', '<version>')} \\
+  transformers=={package_versions.get('transformers', '<version>')} \\
+  safetensors=={package_versions.get('safetensors', '<version>')} \\
+  torch=={package_versions.get('torch', '<version>')}
+# Alternatively, use the provided pyproject.toml:
+```bash
+uv venv
+source venv/bin/activate
+uv sync
+```
+### Environment Variables
+```bash
+HF_TOKEN=<YOUR_HF_TOKEN>
+TOKENIZERS_PARALLELISM="true"
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+## Quantization Script
+Below is the exact quantize.py script used to generate this model (with the exact versions of the dependencies):
+<details><summary>Show Quantization Script</summary>
+```python
+{script_content}
+```
+</details>
+## Quantization Performance
+Average perplexity (PPL) on wikitext v2 dataset: {avg_ppl}
+## Disclaimer
+This model is for research purposes only. It may inherit limitations and biases from the original model and the quantization process. Please use responsibly and refer to the original model card for more details.
+## Contact
+For any questions or support, please visit [ConfidentialMind.com](https://www.confidentialmind.com) or contact us directly.
+## License
+This model inherits the license from the original model. Please refer to the original model card for more details.
+Original model card: {source_model}
+## Attribution
+This model was quantized by [Jaro](https://www.linkedin.com/in/jaroai/)
+## Acknowledgements
+Quantization performed using the GPTQModel pipeline.
+TODO: Add `gptqmodel.utils.eval` integration and auto-generation of eval table.
+---
+*Generated and quantized using GPTQModel.*
+"""
+    readme_path = os.path.join(quantized_model_dir, "README.md")
+    with open(readme_path, "w") as f:
+        f.write(readme_content)
+    typer.echo("README.md created with detailed information.")
+if __name__ == "__main__":
+    app()
+```
+</details>
+## Quantization Performance
+Average perplexity (PPL) on wikitext v2 dataset: 23.63232087314638
+## Disclaimer
+This model is for research purposes only. It may inherit limitations and biases from the original model and the quantization process. Please use responsibly and refer to the original model card for more details.
+## Contact
+For any questions or support, please visit [ConfidentialMind.com](https://www.confidentialmind.com) or contact us directly.
+## License
+This model inherits the license from the original model. Please refer to the original model card for more details.
+Original model card: mistralai/Mistral-Small-24B-Instruct-2501
+## Attribution
+This model was quantized by [Jaro](https://www.linkedin.com/in/jaroai/)
+## Acknowledgements
+Quantization performed using the GPTQModel pipeline.
+TODO: Add `gptqmodel.utils.eval` integration and auto-generation of eval table.
+---
+*Generated and quantized using GPTQModel.*

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "/home/jaro/.cache/huggingface/hub/models--mistralai--Mistral-Small-24B-Instruct-2501/snapshots/20b2ed1c4e9af44b9ad125f79f713301e27737e2",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 32768,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "quantization_config": {
+    "bits": 4,
+    "checkpoint_format": "gptq",
+    "desc_act": true,
+    "group_size": 128,
+    "lm_head": false,
+    "meta": {
+      "damp_auto_increment": 0.0025,
+      "damp_percent": 0.015,
+      "mse": 0.01,
+      "quantizer": [
+        "gptqmodel:1.9.0"
+      ],
+      "static_groups": false,
+      "true_sequential": true,
+      "uri": "https://github.com/modelcloud/gptqmodel"
+    },
+    "pack_dtype": "int32",
+    "quant_method": "gptq",
+    "sym": true
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 100000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "vocab_size": 131072
+}

model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7f3b68d95281175014f2c2f1b38e92f524d77ccbc80e6ff6109df8c406bea2d
+size 3970502008

model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d18570a86fb2074c57737ed9f745a686573cdf6864f97e61f2992dd166a48d09
+size 3931377544

model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c43dcb640596ae407a2be24afbb87b15aed485f68288cc11b529d501624369da
+size 3958826928

model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44cb3a456e5d474b34d6cc243de912952bbcb8ef04e0a9f7343191b9d15db8ac
+size 2383688840

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,1210 @@

+{
+  "metadata": {
+    "total_size": 14244259840
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00004-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.34.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.down_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.down_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.up_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.mlp.up_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.35.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.36.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.down_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.down_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.36.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.36.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.36.mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.36.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.up_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.36.mlp.up_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.36.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors",
+    "model.layers.36.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
+    "model.layers.37.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.down_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.down_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.gate_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.up_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.mlp.up_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.k_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.o_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.q_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.37.self_attn.v_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.down_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.down_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.gate_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.up_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.mlp.up_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.k_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.o_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.q_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.38.self_attn.v_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.down_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.down_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.gate_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.up_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.mlp.up_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.k_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.o_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.q_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors",
+    "model.layers.39.self_attn.v_proj.scales": "model-00004-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.qweight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.scales": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
+    "model.norm.weight": "model-00004-of-00004.safetensors"
+  }
+}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,30 @@

+cat pyproject.toml
+[build-system]
+requires = ["uv", "setuptools>=61.0", "wheel"] # uv for uv-aware builds, setuptools for packaging
+build-backend = "setuptools.build_meta"
+[project]
+name = "cquantize"
+version = "0.1.0"
+description = "Quantization script module for confidentialmind-graph project for 4bit GPTQ quantizations (so far)"
+readme = "README.md"
+requires-python = ">=3.11,<=3.13.10" # 3.13.8 is used in the main project
+dependencies = [
+    "python-dotenv>=1.0.1",
+    "gptqmodel>=1.9.0",
+    "threadpoolctl>=3.5.0",
+    "tokenicer>=0.0.2",
+    "device-smi>=0.3.3",
+    "pillow>=11.1.0",
+    "torch>=2.6.0",
+    "accelerate>=1.3.0",
+    "safetensors>=0.5.2",
+    "transformers>=4.48.3",
+    "datasets>=3.3.0",
+    "huggingface-hub>=0.28.1",
+    "typer>=0.15.1",
+]
+[tool.setuptools.package-data]
+quantize = ["README.md", "*.py"] # Include README and Python files if packaged

quant_log.csv ADDED Viewed

	@@ -0,0 +1,281 @@

+layer,module,loss,damp,time
+0,self_attn.k_proj,4.35013,0.01500,5.796
+0,self_attn.v_proj,0.01878,0.01500,3.952
+0,self_attn.q_proj,9.39139,0.01500,4.144
+0,self_attn.o_proj,0.00107,0.01500,4.251
+0,mlp.up_proj,3.44379,0.01500,6.893
+0,mlp.gate_proj,3.71144,0.01500,5.608
+0,mlp.down_proj,0.00267,0.01500,48.588
+1,self_attn.k_proj,29.64296,0.01500,5.442
+1,self_attn.v_proj,0.29023,0.01500,3.862
+1,self_attn.q_proj,32.81706,0.01500,4.049
+1,self_attn.o_proj,0.00473,0.01500,4.184
+1,mlp.up_proj,10.46320,0.01500,6.873
+1,mlp.gate_proj,11.52195,0.01500,5.558
+1,mlp.down_proj,0.00605,0.01500,47.599
+2,self_attn.k_proj,27.76187,0.01500,5.300
+2,self_attn.v_proj,0.46129,0.01500,3.722
+2,self_attn.q_proj,25.52964,0.01500,3.899
+2,self_attn.o_proj,0.01180,0.01500,4.072
+2,mlp.up_proj,23.40275,0.01500,6.705
+2,mlp.gate_proj,25.72694,0.01500,5.453
+2,mlp.down_proj,0.10461,0.01650,48.469
+3,self_attn.k_proj,23.46399,0.01500,5.300
+3,self_attn.v_proj,1.97829,0.01500,3.698
+3,self_attn.q_proj,44.69431,0.01500,3.907
+3,self_attn.o_proj,0.01513,0.01500,4.079
+3,mlp.up_proj,34.65149,0.01500,6.713
+3,mlp.gate_proj,39.73456,0.01500,5.435
+3,mlp.down_proj,0.02708,0.01500,47.462
+4,self_attn.k_proj,18.60740,0.01500,5.257
+4,self_attn.v_proj,2.28142,0.01500,3.690
+4,self_attn.q_proj,37.03451,0.01500,3.843
+4,self_attn.o_proj,0.03601,0.01500,4.081
+4,mlp.up_proj,49.40864,0.01500,6.685
+4,mlp.gate_proj,57.38699,0.01500,5.422
+4,mlp.down_proj,0.05721,0.01500,47.377
+5,self_attn.k_proj,20.30448,0.01500,5.246
+5,self_attn.v_proj,3.00630,0.01500,3.657
+5,self_attn.q_proj,42.47562,0.01500,3.826
+5,self_attn.o_proj,0.06852,0.01500,4.050
+5,mlp.up_proj,70.86872,0.01500,6.692
+5,mlp.gate_proj,82.60092,0.01500,5.394
+5,mlp.down_proj,0.10330,0.01500,47.285
+6,self_attn.k_proj,19.27090,0.01500,5.277
+6,self_attn.v_proj,3.89179,0.01500,3.702
+6,self_attn.q_proj,48.03176,0.01500,3.841
+6,self_attn.o_proj,0.06790,0.01500,4.044
+6,mlp.up_proj,98.14202,0.01500,6.693
+6,mlp.gate_proj,111.40297,0.01500,5.420
+6,mlp.down_proj,0.14446,0.01500,47.310
+7,self_attn.k_proj,25.72945,0.01500,5.264
+7,self_attn.v_proj,6.18809,0.01500,3.666
+7,self_attn.q_proj,61.07327,0.01500,3.844
+7,self_attn.o_proj,0.15881,0.01500,4.046
+7,mlp.up_proj,116.99324,0.01500,6.674
+7,mlp.gate_proj,139.79570,0.01500,5.462
+7,mlp.down_proj,0.24200,0.01500,47.374
+8,self_attn.k_proj,26.63376,0.01500,5.274
+8,self_attn.v_proj,6.73780,0.01500,3.717
+8,self_attn.q_proj,66.64472,0.01500,3.868
+8,self_attn.o_proj,0.16581,0.01500,4.044
+8,mlp.up_proj,149.26277,0.01500,6.674
+8,mlp.gate_proj,182.83129,0.01500,5.446
+8,mlp.down_proj,0.29880,0.01500,47.410
+9,self_attn.k_proj,26.10390,0.01500,5.252
+9,self_attn.v_proj,7.00863,0.01500,3.668
+9,self_attn.q_proj,62.03807,0.01500,3.865
+9,self_attn.o_proj,0.19546,0.01500,4.054
+9,mlp.up_proj,183.63750,0.01500,6.685
+9,mlp.gate_proj,221.14066,0.01500,5.404
+9,mlp.down_proj,0.35934,0.01500,47.430
+10,self_attn.k_proj,36.90206,0.01500,5.287
+10,self_attn.v_proj,9.14226,0.01500,3.698
+10,self_attn.q_proj,80.53106,0.01500,3.871
+10,self_attn.o_proj,0.26012,0.01500,4.053
+10,mlp.up_proj,198.18631,0.01500,6.689
+10,mlp.gate_proj,240.66970,0.01500,5.442
+10,mlp.down_proj,0.46570,0.01500,47.370
+11,self_attn.k_proj,32.14743,0.01500,5.274
+11,self_attn.v_proj,9.67020,0.01500,3.697
+11,self_attn.q_proj,72.00557,0.01500,3.859
+11,self_attn.o_proj,0.32261,0.01500,4.051
+11,mlp.up_proj,210.64434,0.01500,6.680
+11,mlp.gate_proj,254.81907,0.01500,5.416
+11,mlp.down_proj,0.47910,0.01500,47.364
+12,self_attn.k_proj,42.83973,0.01500,5.278
+12,self_attn.v_proj,10.15345,0.01500,3.685
+12,self_attn.q_proj,90.94727,0.01500,3.871
+12,self_attn.o_proj,0.35522,0.01500,4.035
+12,mlp.up_proj,213.25906,0.01500,6.688
+12,mlp.gate_proj,247.21341,0.01500,5.420
+12,mlp.down_proj,0.52284,0.01500,47.267
+13,self_attn.k_proj,32.96127,0.01500,5.274
+13,self_attn.v_proj,12.16650,0.01500,3.692
+13,self_attn.q_proj,75.60257,0.01500,3.862
+13,self_attn.o_proj,0.40324,0.01500,4.056
+13,mlp.up_proj,214.29990,0.01500,6.730
+13,mlp.gate_proj,239.46919,0.01500,5.423
+13,mlp.down_proj,0.57616,0.01500,47.349
+14,self_attn.k_proj,47.18682,0.01500,5.269
+14,self_attn.v_proj,17.65432,0.01500,3.694
+14,self_attn.q_proj,101.04409,0.01500,3.881
+14,self_attn.o_proj,0.68764,0.01500,4.025
+14,mlp.up_proj,234.67443,0.01500,6.685
+14,mlp.gate_proj,253.83465,0.01500,5.412
+14,mlp.down_proj,0.65445,0.01500,47.400
+15,self_attn.k_proj,54.09038,0.01500,5.282
+15,self_attn.v_proj,19.85443,0.01500,3.681
+15,self_attn.q_proj,106.11286,0.01500,3.862
+15,self_attn.o_proj,0.52978,0.01500,4.059
+15,mlp.up_proj,265.88758,0.01500,6.683
+15,mlp.gate_proj,288.06800,0.01500,5.417
+15,mlp.down_proj,0.74769,0.01500,47.369
+16,self_attn.k_proj,37.41516,0.01500,5.275
+16,self_attn.v_proj,20.44218,0.01500,3.738
+16,self_attn.q_proj,91.44013,0.01500,3.859
+16,self_attn.o_proj,0.73796,0.01500,4.080
+16,mlp.up_proj,280.25973,0.01500,6.700
+16,mlp.gate_proj,289.34869,0.01500,5.448
+16,mlp.down_proj,0.97160,0.01500,47.524
+17,self_attn.k_proj,53.08701,0.01500,5.243
+17,self_attn.v_proj,24.74141,0.01500,3.681
+17,self_attn.q_proj,122.04392,0.01500,3.879
+17,self_attn.o_proj,0.91752,0.01500,4.057
+17,mlp.up_proj,304.54559,0.01500,6.694
+17,mlp.gate_proj,325.76280,0.01500,5.421
+17,mlp.down_proj,1.09534,0.01500,47.353
+18,self_attn.k_proj,56.58150,0.01500,5.231
+18,self_attn.v_proj,24.47837,0.01500,3.693
+18,self_attn.q_proj,123.94422,0.01500,3.846
+18,self_attn.o_proj,0.96601,0.01500,4.053
+18,mlp.up_proj,330.56242,0.01500,6.690
+18,mlp.gate_proj,354.70744,0.01500,5.409
+18,mlp.down_proj,1.22894,0.01500,47.584
+19,self_attn.k_proj,52.15675,0.01500,5.281
+19,self_attn.v_proj,20.42821,0.01500,3.689
+19,self_attn.q_proj,111.40133,0.01500,3.865
+19,self_attn.o_proj,0.59738,0.01500,4.067
+19,mlp.up_proj,342.01756,0.01500,6.713
+19,mlp.gate_proj,371.13833,0.01500,5.436
+19,mlp.down_proj,1.24512,0.01500,47.561
+20,self_attn.k_proj,65.34574,0.01500,5.268
+20,self_attn.v_proj,20.79969,0.01500,3.698
+20,self_attn.q_proj,120.78517,0.01500,3.856
+20,self_attn.o_proj,0.79933,0.01500,4.057
+20,mlp.up_proj,379.47065,0.01500,6.673
+20,mlp.gate_proj,416.61195,0.01500,5.464
+20,mlp.down_proj,1.41267,0.01500,47.437
+21,self_attn.k_proj,51.96946,0.01500,5.252
+21,self_attn.v_proj,19.60039,0.01500,3.687
+21,self_attn.q_proj,111.92335,0.01500,3.843
+21,self_attn.o_proj,0.59658,0.01500,4.074
+21,mlp.up_proj,415.81448,0.01500,6.705
+21,mlp.gate_proj,470.86329,0.01500,5.443
+21,mlp.down_proj,1.63403,0.01500,47.409
+22,self_attn.k_proj,54.91379,0.01500,5.293
+22,self_attn.v_proj,22.27869,0.01500,3.704
+22,self_attn.q_proj,120.27491,0.01500,3.854
+22,self_attn.o_proj,0.52668,0.01500,4.036
+22,mlp.up_proj,424.89161,0.01500,6.671
+22,mlp.gate_proj,486.23778,0.01500,5.430
+22,mlp.down_proj,1.71606,0.01500,47.340
+23,self_attn.k_proj,67.44897,0.01500,5.288
+23,self_attn.v_proj,24.07980,0.01500,3.719
+23,self_attn.q_proj,124.59901,0.01500,3.861
+23,self_attn.o_proj,0.68255,0.01500,4.066
+23,mlp.up_proj,434.17176,0.01500,6.694
+23,mlp.gate_proj,496.52634,0.01500,5.452
+23,mlp.down_proj,1.82865,0.01500,47.298
+24,self_attn.k_proj,47.10809,0.01500,5.292
+24,self_attn.v_proj,22.28693,0.01500,3.699
+24,self_attn.q_proj,116.92687,0.01500,3.876
+24,self_attn.o_proj,0.52270,0.01500,4.068
+24,mlp.up_proj,435.76123,0.01500,6.705
+24,mlp.gate_proj,486.76570,0.01500,5.432
+24,mlp.down_proj,1.88327,0.01500,47.333
+25,self_attn.k_proj,62.62039,0.01500,5.271
+25,self_attn.v_proj,24.71245,0.01500,3.739
+25,self_attn.q_proj,123.76468,0.01500,3.894
+25,self_attn.o_proj,0.83373,0.01500,4.075
+25,mlp.up_proj,458.06028,0.01500,6.708
+25,mlp.gate_proj,527.59241,0.01500,5.438
+25,mlp.down_proj,2.04980,0.01500,47.372
+26,self_attn.k_proj,50.97967,0.01500,5.285
+26,self_attn.v_proj,24.99530,0.01500,3.689
+26,self_attn.q_proj,122.16379,0.01500,3.884
+26,self_attn.o_proj,0.50815,0.01500,4.071
+26,mlp.up_proj,474.21384,0.01500,6.708
+26,mlp.gate_proj,560.86479,0.01500,5.421
+26,mlp.down_proj,2.12071,0.01500,47.407
+27,self_attn.k_proj,32.21025,0.01500,5.284
+27,self_attn.v_proj,22.32059,0.01500,3.672
+27,self_attn.q_proj,93.89602,0.01500,3.866
+27,self_attn.o_proj,0.84583,0.01500,4.067
+27,mlp.up_proj,471.07579,0.01500,6.717
+27,mlp.gate_proj,548.74328,0.01500,5.469
+27,mlp.down_proj,2.41532,0.01500,47.444
+28,self_attn.k_proj,60.67304,0.01500,5.264
+28,self_attn.v_proj,30.47938,0.01500,3.699
+28,self_attn.q_proj,134.55070,0.01500,3.872
+28,self_attn.o_proj,0.74397,0.01500,4.059
+28,mlp.up_proj,490.40091,0.01500,6.696
+28,mlp.gate_proj,595.66978,0.01500,5.469
+28,mlp.down_proj,1.89024,0.01500,47.428
+29,self_attn.k_proj,50.39839,0.01500,5.276
+29,self_attn.v_proj,22.60897,0.01500,3.714
+29,self_attn.q_proj,104.39243,0.01500,3.870
+29,self_attn.o_proj,0.88428,0.01500,4.077
+29,mlp.up_proj,508.61017,0.01500,6.704
+29,mlp.gate_proj,620.95550,0.01500,5.440
+29,mlp.down_proj,2.13120,0.01500,47.371
+30,self_attn.k_proj,68.45669,0.01500,5.327
+30,self_attn.v_proj,34.79505,0.01500,3.705
+30,self_attn.q_proj,140.91889,0.01500,3.889
+30,self_attn.o_proj,0.99748,0.01500,4.053
+30,mlp.up_proj,540.70949,0.01500,6.699
+30,mlp.gate_proj,659.83809,0.01500,5.431
+30,mlp.down_proj,2.10708,0.01500,47.384
+31,self_attn.k_proj,44.78834,0.01500,5.256
+31,self_attn.v_proj,31.78718,0.01500,3.696
+31,self_attn.q_proj,114.04412,0.01500,3.877
+31,self_attn.o_proj,0.59152,0.01500,4.065
+31,mlp.up_proj,560.45728,0.01500,6.695
+31,mlp.gate_proj,674.60949,0.01500,5.428
+31,mlp.down_proj,2.30641,0.01500,47.367
+32,self_attn.k_proj,56.30912,0.01500,5.275
+32,self_attn.v_proj,34.25995,0.01500,3.708
+32,self_attn.q_proj,121.05670,0.01500,3.869
+32,self_attn.o_proj,0.82758,0.01500,4.075
+32,mlp.up_proj,591.63220,0.01500,6.689
+32,mlp.gate_proj,706.15997,0.01500,5.451
+32,mlp.down_proj,2.53290,0.01500,47.267
+33,self_attn.k_proj,46.11280,0.01500,5.284
+33,self_attn.v_proj,42.51080,0.01500,3.726
+33,self_attn.q_proj,120.57386,0.01500,3.907
+33,self_attn.o_proj,0.89769,0.01500,4.080
+33,mlp.up_proj,627.82678,0.01500,6.741
+33,mlp.gate_proj,722.65380,0.01500,5.448
+33,mlp.down_proj,3.02083,0.01500,47.569
+34,self_attn.k_proj,57.65119,0.01500,5.307
+34,self_attn.v_proj,47.52145,0.01500,3.731
+34,self_attn.q_proj,126.24557,0.01500,3.907
+34,self_attn.o_proj,2.01183,0.01500,4.098
+34,mlp.up_proj,700.00593,0.01500,6.708
+34,mlp.gate_proj,757.95016,0.01500,5.469
+34,mlp.down_proj,4.47699,0.01500,47.457
+35,self_attn.k_proj,58.37378,0.01500,5.287
+35,self_attn.v_proj,52.73662,0.01500,3.720
+35,self_attn.q_proj,126.43205,0.01500,3.886
+35,self_attn.o_proj,1.93946,0.01500,4.104
+35,mlp.up_proj,750.29913,0.01500,6.719
+35,mlp.gate_proj,780.23101,0.01500,5.440
+35,mlp.down_proj,6.17615,0.01500,47.425
+36,self_attn.k_proj,51.75059,0.01500,5.291
+36,self_attn.v_proj,60.43876,0.01500,3.710
+36,self_attn.q_proj,129.25831,0.01500,3.886
+36,self_attn.o_proj,4.24202,0.01500,4.068
+36,mlp.up_proj,881.16835,0.01500,6.726
+36,mlp.gate_proj,894.70775,0.01500,5.467
+36,mlp.down_proj,9.76076,0.01500,47.466
+37,self_attn.k_proj,61.10957,0.01500,5.386
+37,self_attn.v_proj,92.40344,0.01500,3.698
+37,self_attn.q_proj,152.37925,0.01500,3.911
+37,self_attn.o_proj,8.34289,0.01500,4.077
+37,mlp.up_proj,1013.91551,0.01500,6.703
+37,mlp.gate_proj,1019.51068,0.01500,5.443
+37,mlp.down_proj,14.96210,0.01500,47.407
+38,self_attn.k_proj,57.50252,0.01500,5.300
+38,self_attn.v_proj,74.01140,0.01500,3.718
+38,self_attn.q_proj,142.36679,0.01500,3.895
+38,self_attn.o_proj,11.92842,0.01500,4.074
+38,mlp.up_proj,950.04810,0.01500,6.712
+38,mlp.gate_proj,934.03283,0.01500,5.426
+38,mlp.down_proj,25.90887,0.01500,47.661
+39,self_attn.k_proj,56.80673,0.01500,5.343
+39,self_attn.v_proj,41.75524,0.01500,3.736
+39,self_attn.q_proj,137.96739,0.01500,3.917
+39,self_attn.o_proj,14.77933,0.01500,4.105
+39,mlp.up_proj,794.27943,0.01500,6.733
+39,mlp.gate_proj,831.57729,0.01500,5.472
+39,mlp.down_proj,44.98471,0.01500,47.570

quantize_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "bits": 4,
+  "group_size": 128,
+  "desc_act": true,
+  "sym": true,
+  "lm_head": false,
+  "quant_method": "gptq",
+  "checkpoint_format": "gptq",
+  "pack_dtype": "int32",
+  "meta": {
+    "quantizer": [
+      "gptqmodel:1.9.0"
+    ],
+    "uri": "https://github.com/modelcloud/gptqmodel",
+    "damp_percent": 0.015,
+    "damp_auto_increment": 0.0025,
+    "static_groups": false,
+    "true_sequential": true,
+    "mse": 0.01
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,1025 @@

+{
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>",
+    "[INST]",
+    "[/INST]",
+    "[AVAILABLE_TOOLS]",
+    "[/AVAILABLE_TOOLS]",
+    "[TOOL_RESULTS]",
+    "[/TOOL_RESULTS]",
+    "[TOOL_CALLS]",
+    "[IMG]",
+    "<pad>",
+    "[IMG_BREAK]",
+    "[IMG_END]",
+    "[PREFIX]",
+    "[MIDDLE]",
+    "[SUFFIX]",
+    "[SYSTEM_PROMPT]",
+    "[/SYSTEM_PROMPT]",
+    "[TOOL_CONTENT]",
+    "<SPECIAL_20>",
+    "<SPECIAL_21>",
+    "<SPECIAL_22>",
+    "<SPECIAL_23>",
+    "<SPECIAL_24>",
+    "<SPECIAL_25>",
+    "<SPECIAL_26>",
+    "<SPECIAL_27>",
+    "<SPECIAL_28>",
+    "<SPECIAL_29>",
+    "<SPECIAL_30>",
+    "<SPECIAL_31>",
+    "<SPECIAL_32>",
+    "<SPECIAL_33>",
+    "<SPECIAL_34>",
+    "<SPECIAL_35>",
+    "<SPECIAL_36>",
+    "<SPECIAL_37>",
+    "<SPECIAL_38>",
+    "<SPECIAL_39>",
+    "<SPECIAL_40>",
+    "<SPECIAL_41>",
+    "<SPECIAL_42>",
+    "<SPECIAL_43>",
+    "<SPECIAL_44>",
+    "<SPECIAL_45>",
+    "<SPECIAL_46>",
+    "<SPECIAL_47>",
+    "<SPECIAL_48>",
+    "<SPECIAL_49>",
+    "<SPECIAL_50>",
+    "<SPECIAL_51>",
+    "<SPECIAL_52>",
+    "<SPECIAL_53>",
+    "<SPECIAL_54>",
+    "<SPECIAL_55>",
+    "<SPECIAL_56>",
+    "<SPECIAL_57>",
+    "<SPECIAL_58>",
+    "<SPECIAL_59>",
+    "<SPECIAL_60>",
+    "<SPECIAL_61>",
+    "<SPECIAL_62>",
+    "<SPECIAL_63>",
+    "<SPECIAL_64>",
+    "<SPECIAL_65>",
+    "<SPECIAL_66>",
+    "<SPECIAL_67>",
+    "<SPECIAL_68>",
+    "<SPECIAL_69>",
+    "<SPECIAL_70>",
+    "<SPECIAL_71>",
+    "<SPECIAL_72>",
+    "<SPECIAL_73>",
+    "<SPECIAL_74>",
+    "<SPECIAL_75>",
+    "<SPECIAL_76>",
+    "<SPECIAL_77>",
+    "<SPECIAL_78>",
+    "<SPECIAL_79>",
+    "<SPECIAL_80>",
+    "<SPECIAL_81>",
+    "<SPECIAL_82>",
+    "<SPECIAL_83>",
+    "<SPECIAL_84>",
+    "<SPECIAL_85>",
+    "<SPECIAL_86>",
+    "<SPECIAL_87>",
+    "<SPECIAL_88>",
+    "<SPECIAL_89>",
+    "<SPECIAL_90>",
+    "<SPECIAL_91>",
+    "<SPECIAL_92>",
+    "<SPECIAL_93>",
+    "<SPECIAL_94>",
+    "<SPECIAL_95>",
+    "<SPECIAL_96>",
+    "<SPECIAL_97>",
+    "<SPECIAL_98>",
+    "<SPECIAL_99>",
+    "<SPECIAL_100>",
+    "<SPECIAL_101>",
+    "<SPECIAL_102>",
+    "<SPECIAL_103>",
+    "<SPECIAL_104>",
+    "<SPECIAL_105>",
+    "<SPECIAL_106>",
+    "<SPECIAL_107>",
+    "<SPECIAL_108>",
+    "<SPECIAL_109>",
+    "<SPECIAL_110>",
+    "<SPECIAL_111>",
+    "<SPECIAL_112>",
+    "<SPECIAL_113>",
+    "<SPECIAL_114>",
+    "<SPECIAL_115>",
+    "<SPECIAL_116>",
+    "<SPECIAL_117>",
+    "<SPECIAL_118>",
+    "<SPECIAL_119>",
+    "<SPECIAL_120>",
+    "<SPECIAL_121>",
+    "<SPECIAL_122>",
+    "<SPECIAL_123>",
+    "<SPECIAL_124>",
+    "<SPECIAL_125>",
+    "<SPECIAL_126>",
+    "<SPECIAL_127>",
+    "<SPECIAL_128>",
+    "<SPECIAL_129>",
+    "<SPECIAL_130>",
+    "<SPECIAL_131>",
+    "<SPECIAL_132>",
+    "<SPECIAL_133>",
+    "<SPECIAL_134>",
+    "<SPECIAL_135>",
+    "<SPECIAL_136>",
+    "<SPECIAL_137>",
+    "<SPECIAL_138>",
+    "<SPECIAL_139>",
+    "<SPECIAL_140>",
+    "<SPECIAL_141>",
+    "<SPECIAL_142>",
+    "<SPECIAL_143>",
+    "<SPECIAL_144>",
+    "<SPECIAL_145>",
+    "<SPECIAL_146>",
+    "<SPECIAL_147>",
+    "<SPECIAL_148>",
+    "<SPECIAL_149>",
+    "<SPECIAL_150>",
+    "<SPECIAL_151>",
+    "<SPECIAL_152>",
+    "<SPECIAL_153>",
+    "<SPECIAL_154>",
+    "<SPECIAL_155>",
+    "<SPECIAL_156>",
+    "<SPECIAL_157>",
+    "<SPECIAL_158>",
+    "<SPECIAL_159>",
+    "<SPECIAL_160>",
+    "<SPECIAL_161>",
+    "<SPECIAL_162>",
+    "<SPECIAL_163>",
+    "<SPECIAL_164>",
+    "<SPECIAL_165>",
+    "<SPECIAL_166>",
+    "<SPECIAL_167>",
+    "<SPECIAL_168>",
+    "<SPECIAL_169>",
+    "<SPECIAL_170>",
+    "<SPECIAL_171>",
+    "<SPECIAL_172>",
+    "<SPECIAL_173>",
+    "<SPECIAL_174>",
+    "<SPECIAL_175>",
+    "<SPECIAL_176>",
+    "<SPECIAL_177>",
+    "<SPECIAL_178>",
+    "<SPECIAL_179>",
+    "<SPECIAL_180>",
+    "<SPECIAL_181>",
+    "<SPECIAL_182>",
+    "<SPECIAL_183>",
+    "<SPECIAL_184>",
+    "<SPECIAL_185>",
+    "<SPECIAL_186>",
+    "<SPECIAL_187>",
+    "<SPECIAL_188>",
+    "<SPECIAL_189>",
+    "<SPECIAL_190>",
+    "<SPECIAL_191>",
+    "<SPECIAL_192>",
+    "<SPECIAL_193>",
+    "<SPECIAL_194>",
+    "<SPECIAL_195>",
+    "<SPECIAL_196>",
+    "<SPECIAL_197>",
+    "<SPECIAL_198>",
+    "<SPECIAL_199>",
+    "<SPECIAL_200>",
+    "<SPECIAL_201>",
+    "<SPECIAL_202>",
+    "<SPECIAL_203>",
+    "<SPECIAL_204>",
+    "<SPECIAL_205>",
+    "<SPECIAL_206>",
+    "<SPECIAL_207>",
+    "<SPECIAL_208>",
+    "<SPECIAL_209>",
+    "<SPECIAL_210>",
+    "<SPECIAL_211>",
+    "<SPECIAL_212>",
+    "<SPECIAL_213>",
+    "<SPECIAL_214>",
+    "<SPECIAL_215>",
+    "<SPECIAL_216>",
+    "<SPECIAL_217>",
+    "<SPECIAL_218>",
+    "<SPECIAL_219>",
+    "<SPECIAL_220>",
+    "<SPECIAL_221>",
+    "<SPECIAL_222>",
+    "<SPECIAL_223>",
+    "<SPECIAL_224>",
+    "<SPECIAL_225>",
+    "<SPECIAL_226>",
+    "<SPECIAL_227>",
+    "<SPECIAL_228>",
+    "<SPECIAL_229>",
+    "<SPECIAL_230>",
+    "<SPECIAL_231>",
+    "<SPECIAL_232>",
+    "<SPECIAL_233>",
+    "<SPECIAL_234>",
+    "<SPECIAL_235>",
+    "<SPECIAL_236>",
+    "<SPECIAL_237>",
+    "<SPECIAL_238>",
+    "<SPECIAL_239>",
+    "<SPECIAL_240>",
+    "<SPECIAL_241>",
+    "<SPECIAL_242>",
+    "<SPECIAL_243>",
+    "<SPECIAL_244>",
+    "<SPECIAL_245>",
+    "<SPECIAL_246>",
+    "<SPECIAL_247>",
+    "<SPECIAL_248>",
+    "<SPECIAL_249>",
+    "<SPECIAL_250>",
+    "<SPECIAL_251>",
+    "<SPECIAL_252>",
+    "<SPECIAL_253>",
+    "<SPECIAL_254>",
+    "<SPECIAL_255>",
+    "<SPECIAL_256>",
+    "<SPECIAL_257>",
+    "<SPECIAL_258>",
+    "<SPECIAL_259>",
+    "<SPECIAL_260>",
+    "<SPECIAL_261>",
+    "<SPECIAL_262>",
+    "<SPECIAL_263>",
+    "<SPECIAL_264>",
+    "<SPECIAL_265>",
+    "<SPECIAL_266>",
+    "<SPECIAL_267>",
+    "<SPECIAL_268>",
+    "<SPECIAL_269>",
+    "<SPECIAL_270>",
+    "<SPECIAL_271>",
+    "<SPECIAL_272>",
+    "<SPECIAL_273>",
+    "<SPECIAL_274>",
+    "<SPECIAL_275>",
+    "<SPECIAL_276>",
+    "<SPECIAL_277>",
+    "<SPECIAL_278>",
+    "<SPECIAL_279>",
+    "<SPECIAL_280>",
+    "<SPECIAL_281>",
+    "<SPECIAL_282>",
+    "<SPECIAL_283>",
+    "<SPECIAL_284>",
+    "<SPECIAL_285>",
+    "<SPECIAL_286>",
+    "<SPECIAL_287>",
+    "<SPECIAL_288>",
+    "<SPECIAL_289>",
+    "<SPECIAL_290>",
+    "<SPECIAL_291>",
+    "<SPECIAL_292>",
+    "<SPECIAL_293>",
+    "<SPECIAL_294>",
+    "<SPECIAL_295>",
+    "<SPECIAL_296>",
+    "<SPECIAL_297>",
+    "<SPECIAL_298>",
+    "<SPECIAL_299>",
+    "<SPECIAL_300>",
+    "<SPECIAL_301>",
+    "<SPECIAL_302>",
+    "<SPECIAL_303>",
+    "<SPECIAL_304>",
+    "<SPECIAL_305>",
+    "<SPECIAL_306>",
+    "<SPECIAL_307>",
+    "<SPECIAL_308>",
+    "<SPECIAL_309>",
+    "<SPECIAL_310>",
+    "<SPECIAL_311>",
+    "<SPECIAL_312>",
+    "<SPECIAL_313>",
+    "<SPECIAL_314>",
+    "<SPECIAL_315>",
+    "<SPECIAL_316>",
+    "<SPECIAL_317>",
+    "<SPECIAL_318>",
+    "<SPECIAL_319>",
+    "<SPECIAL_320>",
+    "<SPECIAL_321>",
+    "<SPECIAL_322>",
+    "<SPECIAL_323>",
+    "<SPECIAL_324>",
+    "<SPECIAL_325>",
+    "<SPECIAL_326>",
+    "<SPECIAL_327>",
+    "<SPECIAL_328>",
+    "<SPECIAL_329>",
+    "<SPECIAL_330>",
+    "<SPECIAL_331>",
+    "<SPECIAL_332>",
+    "<SPECIAL_333>",
+    "<SPECIAL_334>",
+    "<SPECIAL_335>",
+    "<SPECIAL_336>",
+    "<SPECIAL_337>",
+    "<SPECIAL_338>",
+    "<SPECIAL_339>",
+    "<SPECIAL_340>",
+    "<SPECIAL_341>",
+    "<SPECIAL_342>",
+    "<SPECIAL_343>",
+    "<SPECIAL_344>",
+    "<SPECIAL_345>",
+    "<SPECIAL_346>",
+    "<SPECIAL_347>",
+    "<SPECIAL_348>",
+    "<SPECIAL_349>",
+    "<SPECIAL_350>",
+    "<SPECIAL_351>",
+    "<SPECIAL_352>",
+    "<SPECIAL_353>",
+    "<SPECIAL_354>",
+    "<SPECIAL_355>",
+    "<SPECIAL_356>",
+    "<SPECIAL_357>",
+    "<SPECIAL_358>",
+    "<SPECIAL_359>",
+    "<SPECIAL_360>",
+    "<SPECIAL_361>",
+    "<SPECIAL_362>",
+    "<SPECIAL_363>",
+    "<SPECIAL_364>",
+    "<SPECIAL_365>",
+    "<SPECIAL_366>",
+    "<SPECIAL_367>",
+    "<SPECIAL_368>",
+    "<SPECIAL_369>",
+    "<SPECIAL_370>",
+    "<SPECIAL_371>",
+    "<SPECIAL_372>",
+    "<SPECIAL_373>",
+    "<SPECIAL_374>",
+    "<SPECIAL_375>",
+    "<SPECIAL_376>",
+    "<SPECIAL_377>",
+    "<SPECIAL_378>",
+    "<SPECIAL_379>",
+    "<SPECIAL_380>",
+    "<SPECIAL_381>",
+    "<SPECIAL_382>",
+    "<SPECIAL_383>",
+    "<SPECIAL_384>",
+    "<SPECIAL_385>",
+    "<SPECIAL_386>",
+    "<SPECIAL_387>",
+    "<SPECIAL_388>",
+    "<SPECIAL_389>",
+    "<SPECIAL_390>",
+    "<SPECIAL_391>",
+    "<SPECIAL_392>",
+    "<SPECIAL_393>",
+    "<SPECIAL_394>",
+    "<SPECIAL_395>",
+    "<SPECIAL_396>",
+    "<SPECIAL_397>",
+    "<SPECIAL_398>",
+    "<SPECIAL_399>",
+    "<SPECIAL_400>",
+    "<SPECIAL_401>",
+    "<SPECIAL_402>",
+    "<SPECIAL_403>",
+    "<SPECIAL_404>",
+    "<SPECIAL_405>",
+    "<SPECIAL_406>",
+    "<SPECIAL_407>",
+    "<SPECIAL_408>",
+    "<SPECIAL_409>",
+    "<SPECIAL_410>",
+    "<SPECIAL_411>",
+    "<SPECIAL_412>",
+    "<SPECIAL_413>",
+    "<SPECIAL_414>",
+    "<SPECIAL_415>",
+    "<SPECIAL_416>",
+    "<SPECIAL_417>",
+    "<SPECIAL_418>",
+    "<SPECIAL_419>",
+    "<SPECIAL_420>",
+    "<SPECIAL_421>",
+    "<SPECIAL_422>",
+    "<SPECIAL_423>",
+    "<SPECIAL_424>",
+    "<SPECIAL_425>",
+    "<SPECIAL_426>",
+    "<SPECIAL_427>",
+    "<SPECIAL_428>",
+    "<SPECIAL_429>",
+    "<SPECIAL_430>",
+    "<SPECIAL_431>",
+    "<SPECIAL_432>",
+    "<SPECIAL_433>",
+    "<SPECIAL_434>",
+    "<SPECIAL_435>",
+    "<SPECIAL_436>",
+    "<SPECIAL_437>",
+    "<SPECIAL_438>",
+    "<SPECIAL_439>",
+    "<SPECIAL_440>",
+    "<SPECIAL_441>",
+    "<SPECIAL_442>",
+    "<SPECIAL_443>",
+    "<SPECIAL_444>",
+    "<SPECIAL_445>",
+    "<SPECIAL_446>",
+    "<SPECIAL_447>",
+    "<SPECIAL_448>",
+    "<SPECIAL_449>",
+    "<SPECIAL_450>",
+    "<SPECIAL_451>",
+    "<SPECIAL_452>",
+    "<SPECIAL_453>",
+    "<SPECIAL_454>",
+    "<SPECIAL_455>",
+    "<SPECIAL_456>",
+    "<SPECIAL_457>",
+    "<SPECIAL_458>",
+    "<SPECIAL_459>",
+    "<SPECIAL_460>",
+    "<SPECIAL_461>",
+    "<SPECIAL_462>",
+    "<SPECIAL_463>",
+    "<SPECIAL_464>",
+    "<SPECIAL_465>",
+    "<SPECIAL_466>",
+    "<SPECIAL_467>",
+    "<SPECIAL_468>",
+    "<SPECIAL_469>",
+    "<SPECIAL_470>",
+    "<SPECIAL_471>",
+    "<SPECIAL_472>",
+    "<SPECIAL_473>",
+    "<SPECIAL_474>",
+    "<SPECIAL_475>",
+    "<SPECIAL_476>",
+    "<SPECIAL_477>",
+    "<SPECIAL_478>",
+    "<SPECIAL_479>",
+    "<SPECIAL_480>",
+    "<SPECIAL_481>",
+    "<SPECIAL_482>",
+    "<SPECIAL_483>",
+    "<SPECIAL_484>",
+    "<SPECIAL_485>",
+    "<SPECIAL_486>",
+    "<SPECIAL_487>",
+    "<SPECIAL_488>",
+    "<SPECIAL_489>",
+    "<SPECIAL_490>",
+    "<SPECIAL_491>",
+    "<SPECIAL_492>",
+    "<SPECIAL_493>",
+    "<SPECIAL_494>",
+    "<SPECIAL_495>",
+    "<SPECIAL_496>",
+    "<SPECIAL_497>",
+    "<SPECIAL_498>",
+    "<SPECIAL_499>",
+    "<SPECIAL_500>",
+    "<SPECIAL_501>",
+    "<SPECIAL_502>",
+    "<SPECIAL_503>",
+    "<SPECIAL_504>",
+    "<SPECIAL_505>",
+    "<SPECIAL_506>",
+    "<SPECIAL_507>",
+    "<SPECIAL_508>",
+    "<SPECIAL_509>",
+    "<SPECIAL_510>",
+    "<SPECIAL_511>",
+    "<SPECIAL_512>",
+    "<SPECIAL_513>",
+    "<SPECIAL_514>",
+    "<SPECIAL_515>",
+    "<SPECIAL_516>",
+    "<SPECIAL_517>",
+    "<SPECIAL_518>",
+    "<SPECIAL_519>",
+    "<SPECIAL_520>",
+    "<SPECIAL_521>",
+    "<SPECIAL_522>",
+    "<SPECIAL_523>",
+    "<SPECIAL_524>",
+    "<SPECIAL_525>",
+    "<SPECIAL_526>",
+    "<SPECIAL_527>",
+    "<SPECIAL_528>",
+    "<SPECIAL_529>",
+    "<SPECIAL_530>",
+    "<SPECIAL_531>",
+    "<SPECIAL_532>",
+    "<SPECIAL_533>",
+    "<SPECIAL_534>",
+    "<SPECIAL_535>",
+    "<SPECIAL_536>",
+    "<SPECIAL_537>",
+    "<SPECIAL_538>",
+    "<SPECIAL_539>",
+    "<SPECIAL_540>",
+    "<SPECIAL_541>",
+    "<SPECIAL_542>",
+    "<SPECIAL_543>",
+    "<SPECIAL_544>",
+    "<SPECIAL_545>",
+    "<SPECIAL_546>",
+    "<SPECIAL_547>",
+    "<SPECIAL_548>",
+    "<SPECIAL_549>",
+    "<SPECIAL_550>",
+    "<SPECIAL_551>",
+    "<SPECIAL_552>",
+    "<SPECIAL_553>",
+    "<SPECIAL_554>",
+    "<SPECIAL_555>",
+    "<SPECIAL_556>",
+    "<SPECIAL_557>",
+    "<SPECIAL_558>",
+    "<SPECIAL_559>",
+    "<SPECIAL_560>",
+    "<SPECIAL_561>",
+    "<SPECIAL_562>",
+    "<SPECIAL_563>",
+    "<SPECIAL_564>",
+    "<SPECIAL_565>",
+    "<SPECIAL_566>",
+    "<SPECIAL_567>",
+    "<SPECIAL_568>",
+    "<SPECIAL_569>",
+    "<SPECIAL_570>",
+    "<SPECIAL_571>",
+    "<SPECIAL_572>",
+    "<SPECIAL_573>",
+    "<SPECIAL_574>",
+    "<SPECIAL_575>",
+    "<SPECIAL_576>",
+    "<SPECIAL_577>",
+    "<SPECIAL_578>",
+    "<SPECIAL_579>",
+    "<SPECIAL_580>",
+    "<SPECIAL_581>",
+    "<SPECIAL_582>",
+    "<SPECIAL_583>",
+    "<SPECIAL_584>",
+    "<SPECIAL_585>",
+    "<SPECIAL_586>",
+    "<SPECIAL_587>",
+    "<SPECIAL_588>",
+    "<SPECIAL_589>",
+    "<SPECIAL_590>",
+    "<SPECIAL_591>",
+    "<SPECIAL_592>",
+    "<SPECIAL_593>",
+    "<SPECIAL_594>",
+    "<SPECIAL_595>",
+    "<SPECIAL_596>",
+    "<SPECIAL_597>",
+    "<SPECIAL_598>",
+    "<SPECIAL_599>",
+    "<SPECIAL_600>",
+    "<SPECIAL_601>",
+    "<SPECIAL_602>",
+    "<SPECIAL_603>",
+    "<SPECIAL_604>",
+    "<SPECIAL_605>",
+    "<SPECIAL_606>",
+    "<SPECIAL_607>",
+    "<SPECIAL_608>",
+    "<SPECIAL_609>",
+    "<SPECIAL_610>",
+    "<SPECIAL_611>",
+    "<SPECIAL_612>",
+    "<SPECIAL_613>",
+    "<SPECIAL_614>",
+    "<SPECIAL_615>",
+    "<SPECIAL_616>",
+    "<SPECIAL_617>",
+    "<SPECIAL_618>",
+    "<SPECIAL_619>",
+    "<SPECIAL_620>",
+    "<SPECIAL_621>",
+    "<SPECIAL_622>",
+    "<SPECIAL_623>",
+    "<SPECIAL_624>",
+    "<SPECIAL_625>",
+    "<SPECIAL_626>",
+    "<SPECIAL_627>",
+    "<SPECIAL_628>",
+    "<SPECIAL_629>",
+    "<SPECIAL_630>",
+    "<SPECIAL_631>",
+    "<SPECIAL_632>",
+    "<SPECIAL_633>",
+    "<SPECIAL_634>",
+    "<SPECIAL_635>",
+    "<SPECIAL_636>",
+    "<SPECIAL_637>",
+    "<SPECIAL_638>",
+    "<SPECIAL_639>",
+    "<SPECIAL_640>",
+    "<SPECIAL_641>",
+    "<SPECIAL_642>",
+    "<SPECIAL_643>",
+    "<SPECIAL_644>",
+    "<SPECIAL_645>",
+    "<SPECIAL_646>",
+    "<SPECIAL_647>",
+    "<SPECIAL_648>",
+    "<SPECIAL_649>",
+    "<SPECIAL_650>",
+    "<SPECIAL_651>",
+    "<SPECIAL_652>",
+    "<SPECIAL_653>",
+    "<SPECIAL_654>",
+    "<SPECIAL_655>",
+    "<SPECIAL_656>",
+    "<SPECIAL_657>",
+    "<SPECIAL_658>",
+    "<SPECIAL_659>",
+    "<SPECIAL_660>",
+    "<SPECIAL_661>",
+    "<SPECIAL_662>",
+    "<SPECIAL_663>",
+    "<SPECIAL_664>",
+    "<SPECIAL_665>",
+    "<SPECIAL_666>",
+    "<SPECIAL_667>",
+    "<SPECIAL_668>",
+    "<SPECIAL_669>",
+    "<SPECIAL_670>",
+    "<SPECIAL_671>",
+    "<SPECIAL_672>",
+    "<SPECIAL_673>",
+    "<SPECIAL_674>",
+    "<SPECIAL_675>",
+    "<SPECIAL_676>",
+    "<SPECIAL_677>",
+    "<SPECIAL_678>",
+    "<SPECIAL_679>",
+    "<SPECIAL_680>",
+    "<SPECIAL_681>",
+    "<SPECIAL_682>",
+    "<SPECIAL_683>",
+    "<SPECIAL_684>",
+    "<SPECIAL_685>",
+    "<SPECIAL_686>",
+    "<SPECIAL_687>",
+    "<SPECIAL_688>",
+    "<SPECIAL_689>",
+    "<SPECIAL_690>",
+    "<SPECIAL_691>",
+    "<SPECIAL_692>",
+    "<SPECIAL_693>",
+    "<SPECIAL_694>",
+    "<SPECIAL_695>",
+    "<SPECIAL_696>",
+    "<SPECIAL_697>",
+    "<SPECIAL_698>",
+    "<SPECIAL_699>",
+    "<SPECIAL_700>",
+    "<SPECIAL_701>",
+    "<SPECIAL_702>",
+    "<SPECIAL_703>",
+    "<SPECIAL_704>",
+    "<SPECIAL_705>",
+    "<SPECIAL_706>",
+    "<SPECIAL_707>",
+    "<SPECIAL_708>",
+    "<SPECIAL_709>",
+    "<SPECIAL_710>",
+    "<SPECIAL_711>",
+    "<SPECIAL_712>",
+    "<SPECIAL_713>",
+    "<SPECIAL_714>",
+    "<SPECIAL_715>",
+    "<SPECIAL_716>",
+    "<SPECIAL_717>",
+    "<SPECIAL_718>",
+    "<SPECIAL_719>",
+    "<SPECIAL_720>",
+    "<SPECIAL_721>",
+    "<SPECIAL_722>",
+    "<SPECIAL_723>",
+    "<SPECIAL_724>",
+    "<SPECIAL_725>",
+    "<SPECIAL_726>",
+    "<SPECIAL_727>",
+    "<SPECIAL_728>",
+    "<SPECIAL_729>",
+    "<SPECIAL_730>",
+    "<SPECIAL_731>",
+    "<SPECIAL_732>",
+    "<SPECIAL_733>",
+    "<SPECIAL_734>",
+    "<SPECIAL_735>",
+    "<SPECIAL_736>",
+    "<SPECIAL_737>",
+    "<SPECIAL_738>",
+    "<SPECIAL_739>",
+    "<SPECIAL_740>",
+    "<SPECIAL_741>",
+    "<SPECIAL_742>",
+    "<SPECIAL_743>",
+    "<SPECIAL_744>",
+    "<SPECIAL_745>",
+    "<SPECIAL_746>",
+    "<SPECIAL_747>",
+    "<SPECIAL_748>",
+    "<SPECIAL_749>",
+    "<SPECIAL_750>",
+    "<SPECIAL_751>",
+    "<SPECIAL_752>",
+    "<SPECIAL_753>",
+    "<SPECIAL_754>",
+    "<SPECIAL_755>",
+    "<SPECIAL_756>",
+    "<SPECIAL_757>",
+    "<SPECIAL_758>",
+    "<SPECIAL_759>",
+    "<SPECIAL_760>",
+    "<SPECIAL_761>",
+    "<SPECIAL_762>",
+    "<SPECIAL_763>",
+    "<SPECIAL_764>",
+    "<SPECIAL_765>",
+    "<SPECIAL_766>",
+    "<SPECIAL_767>",
+    "<SPECIAL_768>",
+    "<SPECIAL_769>",
+    "<SPECIAL_770>",
+    "<SPECIAL_771>",
+    "<SPECIAL_772>",
+    "<SPECIAL_773>",
+    "<SPECIAL_774>",
+    "<SPECIAL_775>",
+    "<SPECIAL_776>",
+    "<SPECIAL_777>",
+    "<SPECIAL_778>",
+    "<SPECIAL_779>",
+    "<SPECIAL_780>",
+    "<SPECIAL_781>",
+    "<SPECIAL_782>",
+    "<SPECIAL_783>",
+    "<SPECIAL_784>",
+    "<SPECIAL_785>",
+    "<SPECIAL_786>",
+    "<SPECIAL_787>",
+    "<SPECIAL_788>",
+    "<SPECIAL_789>",
+    "<SPECIAL_790>",
+    "<SPECIAL_791>",
+    "<SPECIAL_792>",
+    "<SPECIAL_793>",
+    "<SPECIAL_794>",
+    "<SPECIAL_795>",
+    "<SPECIAL_796>",
+    "<SPECIAL_797>",
+    "<SPECIAL_798>",
+    "<SPECIAL_799>",
+    "<SPECIAL_800>",
+    "<SPECIAL_801>",
+    "<SPECIAL_802>",
+    "<SPECIAL_803>",
+    "<SPECIAL_804>",
+    "<SPECIAL_805>",
+    "<SPECIAL_806>",
+    "<SPECIAL_807>",
+    "<SPECIAL_808>",
+    "<SPECIAL_809>",
+    "<SPECIAL_810>",
+    "<SPECIAL_811>",
+    "<SPECIAL_812>",
+    "<SPECIAL_813>",
+    "<SPECIAL_814>",
+    "<SPECIAL_815>",
+    "<SPECIAL_816>",
+    "<SPECIAL_817>",
+    "<SPECIAL_818>",
+    "<SPECIAL_819>",
+    "<SPECIAL_820>",
+    "<SPECIAL_821>",
+    "<SPECIAL_822>",
+    "<SPECIAL_823>",
+    "<SPECIAL_824>",
+    "<SPECIAL_825>",
+    "<SPECIAL_826>",
+    "<SPECIAL_827>",
+    "<SPECIAL_828>",
+    "<SPECIAL_829>",
+    "<SPECIAL_830>",
+    "<SPECIAL_831>",
+    "<SPECIAL_832>",
+    "<SPECIAL_833>",
+    "<SPECIAL_834>",
+    "<SPECIAL_835>",
+    "<SPECIAL_836>",
+    "<SPECIAL_837>",
+    "<SPECIAL_838>",
+    "<SPECIAL_839>",
+    "<SPECIAL_840>",
+    "<SPECIAL_841>",
+    "<SPECIAL_842>",
+    "<SPECIAL_843>",
+    "<SPECIAL_844>",
+    "<SPECIAL_845>",
+    "<SPECIAL_846>",
+    "<SPECIAL_847>",
+    "<SPECIAL_848>",
+    "<SPECIAL_849>",
+    "<SPECIAL_850>",
+    "<SPECIAL_851>",
+    "<SPECIAL_852>",
+    "<SPECIAL_853>",
+    "<SPECIAL_854>",
+    "<SPECIAL_855>",
+    "<SPECIAL_856>",
+    "<SPECIAL_857>",
+    "<SPECIAL_858>",
+    "<SPECIAL_859>",
+    "<SPECIAL_860>",
+    "<SPECIAL_861>",
+    "<SPECIAL_862>",
+    "<SPECIAL_863>",
+    "<SPECIAL_864>",
+    "<SPECIAL_865>",
+    "<SPECIAL_866>",
+    "<SPECIAL_867>",
+    "<SPECIAL_868>",
+    "<SPECIAL_869>",
+    "<SPECIAL_870>",
+    "<SPECIAL_871>",
+    "<SPECIAL_872>",
+    "<SPECIAL_873>",
+    "<SPECIAL_874>",
+    "<SPECIAL_875>",
+    "<SPECIAL_876>",
+    "<SPECIAL_877>",
+    "<SPECIAL_878>",
+    "<SPECIAL_879>",
+    "<SPECIAL_880>",
+    "<SPECIAL_881>",
+    "<SPECIAL_882>",
+    "<SPECIAL_883>",
+    "<SPECIAL_884>",
+    "<SPECIAL_885>",
+    "<SPECIAL_886>",
+    "<SPECIAL_887>",
+    "<SPECIAL_888>",
+    "<SPECIAL_889>",
+    "<SPECIAL_890>",
+    "<SPECIAL_891>",
+    "<SPECIAL_892>",
+    "<SPECIAL_893>",
+    "<SPECIAL_894>",
+    "<SPECIAL_895>",
+    "<SPECIAL_896>",
+    "<SPECIAL_897>",
+    "<SPECIAL_898>",
+    "<SPECIAL_899>",
+    "<SPECIAL_900>",
+    "<SPECIAL_901>",
+    "<SPECIAL_902>",
+    "<SPECIAL_903>",
+    "<SPECIAL_904>",
+    "<SPECIAL_905>",
+    "<SPECIAL_906>",
+    "<SPECIAL_907>",
+    "<SPECIAL_908>",
+    "<SPECIAL_909>",
+    "<SPECIAL_910>",
+    "<SPECIAL_911>",
+    "<SPECIAL_912>",
+    "<SPECIAL_913>",
+    "<SPECIAL_914>",
+    "<SPECIAL_915>",
+    "<SPECIAL_916>",
+    "<SPECIAL_917>",
+    "<SPECIAL_918>",
+    "<SPECIAL_919>",
+    "<SPECIAL_920>",
+    "<SPECIAL_921>",
+    "<SPECIAL_922>",
+    "<SPECIAL_923>",
+    "<SPECIAL_924>",
+    "<SPECIAL_925>",
+    "<SPECIAL_926>",
+    "<SPECIAL_927>",
+    "<SPECIAL_928>",
+    "<SPECIAL_929>",
+    "<SPECIAL_930>",
+    "<SPECIAL_931>",
+    "<SPECIAL_932>",
+    "<SPECIAL_933>",
+    "<SPECIAL_934>",
+    "<SPECIAL_935>",
+    "<SPECIAL_936>",
+    "<SPECIAL_937>",
+    "<SPECIAL_938>",
+    "<SPECIAL_939>",
+    "<SPECIAL_940>",
+    "<SPECIAL_941>",
+    "<SPECIAL_942>",
+    "<SPECIAL_943>",
+    "<SPECIAL_944>",
+    "<SPECIAL_945>",
+    "<SPECIAL_946>",
+    "<SPECIAL_947>",
+    "<SPECIAL_948>",
+    "<SPECIAL_949>",
+    "<SPECIAL_950>",
+    "<SPECIAL_951>",
+    "<SPECIAL_952>",
+    "<SPECIAL_953>",
+    "<SPECIAL_954>",
+    "<SPECIAL_955>",
+    "<SPECIAL_956>",
+    "<SPECIAL_957>",
+    "<SPECIAL_958>",
+    "<SPECIAL_959>",
+    "<SPECIAL_960>",
+    "<SPECIAL_961>",
+    "<SPECIAL_962>",
+    "<SPECIAL_963>",
+    "<SPECIAL_964>",
+    "<SPECIAL_965>",
+    "<SPECIAL_966>",
+    "<SPECIAL_967>",
+    "<SPECIAL_968>",
+    "<SPECIAL_969>",
+    "<SPECIAL_970>",
+    "<SPECIAL_971>",
+    "<SPECIAL_972>",
+    "<SPECIAL_973>",
+    "<SPECIAL_974>",
+    "<SPECIAL_975>",
+    "<SPECIAL_976>",
+    "<SPECIAL_977>",
+    "<SPECIAL_978>",
+    "<SPECIAL_979>",
+    "<SPECIAL_980>",
+    "<SPECIAL_981>",
+    "<SPECIAL_982>",
+    "<SPECIAL_983>",
+    "<SPECIAL_984>",
+    "<SPECIAL_985>",
+    "<SPECIAL_986>",
+    "<SPECIAL_987>",
+    "<SPECIAL_988>",
+    "<SPECIAL_989>",
+    "<SPECIAL_990>",
+    "<SPECIAL_991>",
+    "<SPECIAL_992>",
+    "<SPECIAL_993>",
+    "<SPECIAL_994>",
+    "<SPECIAL_995>",
+    "<SPECIAL_996>",
+    "<SPECIAL_997>",
+    "<SPECIAL_998>",
+    "<SPECIAL_999>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a70ffa5b396383f0cb6d248a40f51dc823cf1eff52b33d8b6bd363a71583a5d0
+size 17078136

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff