Upload MLX converted model with quantization settings

Browse files

Files changed (4) hide show

README.md +8 -1
chat_template.jinja +31 -45
config.json +2 -746
qwen3coder_tool_parser.py +320 -306

README.md CHANGED Viewed

@@ -10,7 +10,14 @@ tags:
 # cs2764/Qwen3-Coder-480B-A35B-Instruct-mlx-mixed_4_6
-The Model [cs2764/Qwen3-Coder-480B-A35B-Instruct-mlx-mixed_4_6](https://huggingface.co/cs2764/Qwen3-Coder-480B-A35B-Instruct-mlx-mixed_4_6) was converted to MLX format from [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) using mlx-lm version **0.26.2**.
 ## Use with mlx

 # cs2764/Qwen3-Coder-480B-A35B-Instruct-mlx-mixed_4_6
+The Model [cs2764/Qwen3-Coder-480B-A35B-Instruct-mlx-mixed_4_6](https://huggingface.co/cs2764/Qwen3-Coder-480B-A35B-Instruct-mlx-mixed_4_6) was converted to MLX format from [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) using mlx-lm version **0.28.0**.
+## Quantization Details
+This model was converted with the following quantization settings:
+- **Quantization Strategy**: mixed_4_6 (Mixed precision)
+- **Average bits per weight**: 4.819
 ## Use with mlx

chat_template.jinja CHANGED Viewed

@@ -1,17 +1,12 @@
-{% macro render_item_list(item_list, tag_name='required') %}
-    {%- if item_list is defined and item_list is iterable and item_list | length > 0 %}
-        {%- if tag_name %}{{- '\n<' ~ tag_name ~ '>' -}}{% endif %}
-            {{- '[' }}
-                {%- for item in item_list -%}
-                    {%- if loop.index > 1 %}{{- ", "}}{% endif -%}
-                    {%- if item is string -%}
-                        {{ "`" ~ item ~ "`" }}
-                    {%- else -%}
-                        {{ item }}
-                    {%- endif -%}
-                {%- endfor -%}
-            {{- ']' }}
-        {%- if tag_name %}{{- '</' ~ tag_name ~ '>' -}}{% endif %}
     {%- endif %}
 {% endmacro %}
@@ -34,46 +29,37 @@
     {%- endif %}
 {%- endif %}
 {%- if tools is iterable and tools | length > 0 %}
-    {{- "\n\nYou have access to the following functions:\n\n" }}
     {{- "<tools>" }}
     {%- for tool in tools %}
         {%- if tool.function is defined %}
             {%- set tool = tool.function %}
         {%- endif %}
         {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
-        {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
         {{- '\n<parameters>' }}
-        {%- for param_name, param_fields in tool.parameters.properties|items %}
-            {{- '\n<parameter>' }}
-            {{- '\n<name>' ~ param_name ~ '</name>' }}
-            {%- if param_fields.type is defined %}
-                {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
-            {%- endif %}
-            {%- if param_fields.description is defined %}
-                {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
-            {%- endif %}
-            {{- render_item_list(param_fields.enum, 'enum') }}
-            {%- set handled_keys = ['type', 'description', 'enum', 'required'] %}
-            {%- for json_key in param_fields.keys() | reject("in", handled_keys) %}
-                {%- set normed_json_key = json_key | replace("-", "_") | replace(" ", "_") | replace("$", "") %}
-                {%- if param_fields[json_key] is mapping %}
-                    {{- '\n<' ~ normed_json_key ~ '>' ~ (param_fields[json_key] | tojson | safe) ~ '</' ~ normed_json_key ~ '>' }}
-                {%- else %}
-                    {{-'\n<' ~ normed_json_key ~ '>' ~ (param_fields[json_key] | string) ~ '</' ~ normed_json_key ~ '>' }}
                 {%- endif %}
             {%- endfor %}
-            {{- render_item_list(param_fields.required, 'required') }}
-            {{- '\n</parameter>' }}
-        {%- endfor %}
-        {{- render_item_list(tool.parameters.required, 'required') }}
-        {{- '\n</parameters>' }}
-        {%- if tool.return is defined %}
-            {%- if tool.return is mapping %}
-                {{- '\n<return>' ~ (tool.return | tojson | safe) ~ '</return>' }}
-            {%- else %}
-                {{- '\n<return>' ~ (tool.return | string) ~ '</return>' }}
-            {%- endif %}
         {%- endif %}
         {{- '\n</function>' }}
     {%- endfor %}
     {{- "\n</tools>" }}
@@ -100,7 +86,7 @@
             {%- if tool_call.arguments is defined %}
                 {%- for args_name, args_value in tool_call.arguments|items %}
                     {{- '<parameter=' + args_name + '>\n' }}
-                    {%- set args_value = args_value if args_value is string else args_value | string %}
                     {{- args_value }}
                     {{- '\n</parameter>\n' }}
                 {%- endfor %}

+{% macro render_extra_keys(json_dict, handled_keys) %}
+    {%- if json_dict is mapping %}
+        {%- for json_key in json_dict if json_key not in handled_keys %}
+            {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
+                {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
+            {%- else %}
+                {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
+            {%- endif %}
+        {%- endfor %}
     {%- endif %}
 {% endmacro %}
     {%- endif %}
 {%- endif %}
 {%- if tools is iterable and tools | length > 0 %}
+    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
     {{- "<tools>" }}
     {%- for tool in tools %}
         {%- if tool.function is defined %}
             {%- set tool = tool.function %}
         {%- endif %}
         {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
+        {%- if tool.description is defined %}
+            {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
+        {%- endif %}
         {{- '\n<parameters>' }}
+        {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
+            {%- for param_name, param_fields in tool.parameters.properties|items %}
+                {{- '\n<parameter>' }}
+                {{- '\n<name>' ~ param_name ~ '</name>' }}
+                {%- if param_fields.type is defined %}
+                    {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
                 {%- endif %}
+                {%- if param_fields.description is defined %}
+                    {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
+                {%- endif %}
+                {%- set handled_keys = ['name', 'type', 'description'] %}
+                {{- render_extra_keys(param_fields, handled_keys) }}
+                {{- '\n</parameter>' }}
             {%- endfor %}
         {%- endif %}
+        {% set handled_keys = ['type', 'properties'] %}
+        {{- render_extra_keys(tool.parameters, handled_keys) }}
+        {{- '\n</parameters>' }}
+        {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
+        {{- render_extra_keys(tool, handled_keys) }}
         {{- '\n</function>' }}
     {%- endfor %}
     {{- "\n</tools>" }}
             {%- if tool_call.arguments is defined %}
                 {%- for args_name, args_value in tool_call.arguments|items %}
                     {{- '<parameter=' + args_name + '>\n' }}
+                    {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
                     {{- args_value }}
                     {{- '\n</parameter>\n' }}
                 {%- endfor %}

config.json CHANGED Viewed

@@ -26,6 +26,7 @@
     "quantization": {
         "group_size": 64,
         "bits": 4,
         "model.embed_tokens": {
             "group_size": 64,
             "bits": 4
@@ -46,11 +47,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.0.self_attn.q_norm": false,
-        "model.layers.0.self_attn.k_norm": false,
-        "model.layers.0.self_attn.rope": false,
-        "model.layers.0.input_layernorm": false,
-        "model.layers.0.post_attention_layernorm": false,
         "model.layers.0.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -67,7 +63,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.0.mlp.switch_mlp.activation": false,
         "model.layers.1.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -84,11 +79,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.1.self_attn.q_norm": false,
-        "model.layers.1.self_attn.k_norm": false,
-        "model.layers.1.self_attn.rope": false,
-        "model.layers.1.input_layernorm": false,
-        "model.layers.1.post_attention_layernorm": false,
         "model.layers.1.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -105,7 +95,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.1.mlp.switch_mlp.activation": false,
         "model.layers.2.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -122,11 +111,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.2.self_attn.q_norm": false,
-        "model.layers.2.self_attn.k_norm": false,
-        "model.layers.2.self_attn.rope": false,
-        "model.layers.2.input_layernorm": false,
-        "model.layers.2.post_attention_layernorm": false,
         "model.layers.2.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -143,7 +127,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.2.mlp.switch_mlp.activation": false,
         "model.layers.3.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -160,11 +143,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.3.self_attn.q_norm": false,
-        "model.layers.3.self_attn.k_norm": false,
-        "model.layers.3.self_attn.rope": false,
-        "model.layers.3.input_layernorm": false,
-        "model.layers.3.post_attention_layernorm": false,
         "model.layers.3.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -181,7 +159,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.3.mlp.switch_mlp.activation": false,
         "model.layers.4.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -198,11 +175,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.4.self_attn.q_norm": false,
-        "model.layers.4.self_attn.k_norm": false,
-        "model.layers.4.self_attn.rope": false,
-        "model.layers.4.input_layernorm": false,
-        "model.layers.4.post_attention_layernorm": false,
         "model.layers.4.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -219,7 +191,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.4.mlp.switch_mlp.activation": false,
         "model.layers.5.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -236,11 +207,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.5.self_attn.q_norm": false,
-        "model.layers.5.self_attn.k_norm": false,
-        "model.layers.5.self_attn.rope": false,
-        "model.layers.5.input_layernorm": false,
-        "model.layers.5.post_attention_layernorm": false,
         "model.layers.5.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -257,7 +223,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.5.mlp.switch_mlp.activation": false,
         "model.layers.6.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -274,11 +239,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.6.self_attn.q_norm": false,
-        "model.layers.6.self_attn.k_norm": false,
-        "model.layers.6.self_attn.rope": false,
-        "model.layers.6.input_layernorm": false,
-        "model.layers.6.post_attention_layernorm": false,
         "model.layers.6.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -295,7 +255,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.6.mlp.switch_mlp.activation": false,
         "model.layers.7.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -312,11 +271,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.7.self_attn.q_norm": false,
-        "model.layers.7.self_attn.k_norm": false,
-        "model.layers.7.self_attn.rope": false,
-        "model.layers.7.input_layernorm": false,
-        "model.layers.7.post_attention_layernorm": false,
         "model.layers.7.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -333,7 +287,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.7.mlp.switch_mlp.activation": false,
         "model.layers.8.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -350,11 +303,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.8.self_attn.q_norm": false,
-        "model.layers.8.self_attn.k_norm": false,
-        "model.layers.8.self_attn.rope": false,
-        "model.layers.8.input_layernorm": false,
-        "model.layers.8.post_attention_layernorm": false,
         "model.layers.8.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -371,7 +319,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.8.mlp.switch_mlp.activation": false,
         "model.layers.9.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -388,11 +335,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.9.self_attn.q_norm": false,
-        "model.layers.9.self_attn.k_norm": false,
-        "model.layers.9.self_attn.rope": false,
-        "model.layers.9.input_layernorm": false,
-        "model.layers.9.post_attention_layernorm": false,
         "model.layers.9.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -409,7 +351,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.9.mlp.switch_mlp.activation": false,
         "model.layers.10.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -426,11 +367,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.10.self_attn.q_norm": false,
-        "model.layers.10.self_attn.k_norm": false,
-        "model.layers.10.self_attn.rope": false,
-        "model.layers.10.input_layernorm": false,
-        "model.layers.10.post_attention_layernorm": false,
         "model.layers.10.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -447,7 +383,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.10.mlp.switch_mlp.activation": false,
         "model.layers.11.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -464,11 +399,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.11.self_attn.q_norm": false,
-        "model.layers.11.self_attn.k_norm": false,
-        "model.layers.11.self_attn.rope": false,
-        "model.layers.11.input_layernorm": false,
-        "model.layers.11.post_attention_layernorm": false,
         "model.layers.11.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -485,7 +415,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.11.mlp.switch_mlp.activation": false,
         "model.layers.12.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -502,11 +431,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.12.self_attn.q_norm": false,
-        "model.layers.12.self_attn.k_norm": false,
-        "model.layers.12.self_attn.rope": false,
-        "model.layers.12.input_layernorm": false,
-        "model.layers.12.post_attention_layernorm": false,
         "model.layers.12.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -523,7 +447,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.12.mlp.switch_mlp.activation": false,
         "model.layers.13.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -540,11 +463,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.13.self_attn.q_norm": false,
-        "model.layers.13.self_attn.k_norm": false,
-        "model.layers.13.self_attn.rope": false,
-        "model.layers.13.input_layernorm": false,
-        "model.layers.13.post_attention_layernorm": false,
         "model.layers.13.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -561,7 +479,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.13.mlp.switch_mlp.activation": false,
         "model.layers.14.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -578,11 +495,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.14.self_attn.q_norm": false,
-        "model.layers.14.self_attn.k_norm": false,
-        "model.layers.14.self_attn.rope": false,
-        "model.layers.14.input_layernorm": false,
-        "model.layers.14.post_attention_layernorm": false,
         "model.layers.14.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -599,7 +511,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.14.mlp.switch_mlp.activation": false,
         "model.layers.15.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -616,11 +527,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.15.self_attn.q_norm": false,
-        "model.layers.15.self_attn.k_norm": false,
-        "model.layers.15.self_attn.rope": false,
-        "model.layers.15.input_layernorm": false,
-        "model.layers.15.post_attention_layernorm": false,
         "model.layers.15.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -637,7 +543,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.15.mlp.switch_mlp.activation": false,
         "model.layers.16.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -654,11 +559,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.16.self_attn.q_norm": false,
-        "model.layers.16.self_attn.k_norm": false,
-        "model.layers.16.self_attn.rope": false,
-        "model.layers.16.input_layernorm": false,
-        "model.layers.16.post_attention_layernorm": false,
         "model.layers.16.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -675,7 +575,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.16.mlp.switch_mlp.activation": false,
         "model.layers.17.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -692,11 +591,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.17.self_attn.q_norm": false,
-        "model.layers.17.self_attn.k_norm": false,
-        "model.layers.17.self_attn.rope": false,
-        "model.layers.17.input_layernorm": false,
-        "model.layers.17.post_attention_layernorm": false,
         "model.layers.17.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -713,7 +607,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.17.mlp.switch_mlp.activation": false,
         "model.layers.18.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -730,11 +623,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.18.self_attn.q_norm": false,
-        "model.layers.18.self_attn.k_norm": false,
-        "model.layers.18.self_attn.rope": false,
-        "model.layers.18.input_layernorm": false,
-        "model.layers.18.post_attention_layernorm": false,
         "model.layers.18.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -751,7 +639,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.18.mlp.switch_mlp.activation": false,
         "model.layers.19.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -768,11 +655,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.19.self_attn.q_norm": false,
-        "model.layers.19.self_attn.k_norm": false,
-        "model.layers.19.self_attn.rope": false,
-        "model.layers.19.input_layernorm": false,
-        "model.layers.19.post_attention_layernorm": false,
         "model.layers.19.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -789,7 +671,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.19.mlp.switch_mlp.activation": false,
         "model.layers.20.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -806,11 +687,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.20.self_attn.q_norm": false,
-        "model.layers.20.self_attn.k_norm": false,
-        "model.layers.20.self_attn.rope": false,
-        "model.layers.20.input_layernorm": false,
-        "model.layers.20.post_attention_layernorm": false,
         "model.layers.20.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -827,7 +703,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.20.mlp.switch_mlp.activation": false,
         "model.layers.21.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -844,11 +719,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.21.self_attn.q_norm": false,
-        "model.layers.21.self_attn.k_norm": false,
-        "model.layers.21.self_attn.rope": false,
-        "model.layers.21.input_layernorm": false,
-        "model.layers.21.post_attention_layernorm": false,
         "model.layers.21.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -865,7 +735,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.21.mlp.switch_mlp.activation": false,
         "model.layers.22.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -882,11 +751,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.22.self_attn.q_norm": false,
-        "model.layers.22.self_attn.k_norm": false,
-        "model.layers.22.self_attn.rope": false,
-        "model.layers.22.input_layernorm": false,
-        "model.layers.22.post_attention_layernorm": false,
         "model.layers.22.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -903,7 +767,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.22.mlp.switch_mlp.activation": false,
         "model.layers.23.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -920,11 +783,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.23.self_attn.q_norm": false,
-        "model.layers.23.self_attn.k_norm": false,
-        "model.layers.23.self_attn.rope": false,
-        "model.layers.23.input_layernorm": false,
-        "model.layers.23.post_attention_layernorm": false,
         "model.layers.23.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -941,7 +799,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.23.mlp.switch_mlp.activation": false,
         "model.layers.24.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -958,11 +815,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.24.self_attn.q_norm": false,
-        "model.layers.24.self_attn.k_norm": false,
-        "model.layers.24.self_attn.rope": false,
-        "model.layers.24.input_layernorm": false,
-        "model.layers.24.post_attention_layernorm": false,
         "model.layers.24.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -979,7 +831,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.24.mlp.switch_mlp.activation": false,
         "model.layers.25.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -996,11 +847,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.25.self_attn.q_norm": false,
-        "model.layers.25.self_attn.k_norm": false,
-        "model.layers.25.self_attn.rope": false,
-        "model.layers.25.input_layernorm": false,
-        "model.layers.25.post_attention_layernorm": false,
         "model.layers.25.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1017,7 +863,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.25.mlp.switch_mlp.activation": false,
         "model.layers.26.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1034,11 +879,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.26.self_attn.q_norm": false,
-        "model.layers.26.self_attn.k_norm": false,
-        "model.layers.26.self_attn.rope": false,
-        "model.layers.26.input_layernorm": false,
-        "model.layers.26.post_attention_layernorm": false,
         "model.layers.26.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1055,7 +895,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.26.mlp.switch_mlp.activation": false,
         "model.layers.27.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1072,11 +911,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.27.self_attn.q_norm": false,
-        "model.layers.27.self_attn.k_norm": false,
-        "model.layers.27.self_attn.rope": false,
-        "model.layers.27.input_layernorm": false,
-        "model.layers.27.post_attention_layernorm": false,
         "model.layers.27.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1093,7 +927,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.27.mlp.switch_mlp.activation": false,
         "model.layers.28.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1110,11 +943,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.28.self_attn.q_norm": false,
-        "model.layers.28.self_attn.k_norm": false,
-        "model.layers.28.self_attn.rope": false,
-        "model.layers.28.input_layernorm": false,
-        "model.layers.28.post_attention_layernorm": false,
         "model.layers.28.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1131,7 +959,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.28.mlp.switch_mlp.activation": false,
         "model.layers.29.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1148,11 +975,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.29.self_attn.q_norm": false,
-        "model.layers.29.self_attn.k_norm": false,
-        "model.layers.29.self_attn.rope": false,
-        "model.layers.29.input_layernorm": false,
-        "model.layers.29.post_attention_layernorm": false,
         "model.layers.29.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1169,7 +991,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.29.mlp.switch_mlp.activation": false,
         "model.layers.30.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1186,11 +1007,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.30.self_attn.q_norm": false,
-        "model.layers.30.self_attn.k_norm": false,
-        "model.layers.30.self_attn.rope": false,
-        "model.layers.30.input_layernorm": false,
-        "model.layers.30.post_attention_layernorm": false,
         "model.layers.30.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1207,7 +1023,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.30.mlp.switch_mlp.activation": false,
         "model.layers.31.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1224,11 +1039,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.31.self_attn.q_norm": false,
-        "model.layers.31.self_attn.k_norm": false,
-        "model.layers.31.self_attn.rope": false,
-        "model.layers.31.input_layernorm": false,
-        "model.layers.31.post_attention_layernorm": false,
         "model.layers.31.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1245,7 +1055,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.31.mlp.switch_mlp.activation": false,
         "model.layers.32.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1262,11 +1071,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.32.self_attn.q_norm": false,
-        "model.layers.32.self_attn.k_norm": false,
-        "model.layers.32.self_attn.rope": false,
-        "model.layers.32.input_layernorm": false,
-        "model.layers.32.post_attention_layernorm": false,
         "model.layers.32.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1283,7 +1087,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.32.mlp.switch_mlp.activation": false,
         "model.layers.33.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1300,11 +1103,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.33.self_attn.q_norm": false,
-        "model.layers.33.self_attn.k_norm": false,
-        "model.layers.33.self_attn.rope": false,
-        "model.layers.33.input_layernorm": false,
-        "model.layers.33.post_attention_layernorm": false,
         "model.layers.33.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1321,7 +1119,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.33.mlp.switch_mlp.activation": false,
         "model.layers.34.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1338,11 +1135,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.34.self_attn.q_norm": false,
-        "model.layers.34.self_attn.k_norm": false,
-        "model.layers.34.self_attn.rope": false,
-        "model.layers.34.input_layernorm": false,
-        "model.layers.34.post_attention_layernorm": false,
         "model.layers.34.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1359,7 +1151,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.34.mlp.switch_mlp.activation": false,
         "model.layers.35.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1376,11 +1167,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.35.self_attn.q_norm": false,
-        "model.layers.35.self_attn.k_norm": false,
-        "model.layers.35.self_attn.rope": false,
-        "model.layers.35.input_layernorm": false,
-        "model.layers.35.post_attention_layernorm": false,
         "model.layers.35.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1397,7 +1183,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.35.mlp.switch_mlp.activation": false,
         "model.layers.36.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1414,11 +1199,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.36.self_attn.q_norm": false,
-        "model.layers.36.self_attn.k_norm": false,
-        "model.layers.36.self_attn.rope": false,
-        "model.layers.36.input_layernorm": false,
-        "model.layers.36.post_attention_layernorm": false,
         "model.layers.36.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1435,7 +1215,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.36.mlp.switch_mlp.activation": false,
         "model.layers.37.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1452,11 +1231,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.37.self_attn.q_norm": false,
-        "model.layers.37.self_attn.k_norm": false,
-        "model.layers.37.self_attn.rope": false,
-        "model.layers.37.input_layernorm": false,
-        "model.layers.37.post_attention_layernorm": false,
         "model.layers.37.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1473,7 +1247,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.37.mlp.switch_mlp.activation": false,
         "model.layers.38.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1490,11 +1263,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.38.self_attn.q_norm": false,
-        "model.layers.38.self_attn.k_norm": false,
-        "model.layers.38.self_attn.rope": false,
-        "model.layers.38.input_layernorm": false,
-        "model.layers.38.post_attention_layernorm": false,
         "model.layers.38.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1511,7 +1279,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.38.mlp.switch_mlp.activation": false,
         "model.layers.39.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1528,11 +1295,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.39.self_attn.q_norm": false,
-        "model.layers.39.self_attn.k_norm": false,
-        "model.layers.39.self_attn.rope": false,
-        "model.layers.39.input_layernorm": false,
-        "model.layers.39.post_attention_layernorm": false,
         "model.layers.39.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1549,7 +1311,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.39.mlp.switch_mlp.activation": false,
         "model.layers.40.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1566,11 +1327,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.40.self_attn.q_norm": false,
-        "model.layers.40.self_attn.k_norm": false,
-        "model.layers.40.self_attn.rope": false,
-        "model.layers.40.input_layernorm": false,
-        "model.layers.40.post_attention_layernorm": false,
         "model.layers.40.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1587,7 +1343,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.40.mlp.switch_mlp.activation": false,
         "model.layers.41.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1604,11 +1359,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.41.self_attn.q_norm": false,
-        "model.layers.41.self_attn.k_norm": false,
-        "model.layers.41.self_attn.rope": false,
-        "model.layers.41.input_layernorm": false,
-        "model.layers.41.post_attention_layernorm": false,
         "model.layers.41.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1625,7 +1375,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.41.mlp.switch_mlp.activation": false,
         "model.layers.42.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1642,11 +1391,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.42.self_attn.q_norm": false,
-        "model.layers.42.self_attn.k_norm": false,
-        "model.layers.42.self_attn.rope": false,
-        "model.layers.42.input_layernorm": false,
-        "model.layers.42.post_attention_layernorm": false,
         "model.layers.42.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1663,7 +1407,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.42.mlp.switch_mlp.activation": false,
         "model.layers.43.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1680,11 +1423,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.43.self_attn.q_norm": false,
-        "model.layers.43.self_attn.k_norm": false,
-        "model.layers.43.self_attn.rope": false,
-        "model.layers.43.input_layernorm": false,
-        "model.layers.43.post_attention_layernorm": false,
         "model.layers.43.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1701,7 +1439,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.43.mlp.switch_mlp.activation": false,
         "model.layers.44.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1718,11 +1455,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.44.self_attn.q_norm": false,
-        "model.layers.44.self_attn.k_norm": false,
-        "model.layers.44.self_attn.rope": false,
-        "model.layers.44.input_layernorm": false,
-        "model.layers.44.post_attention_layernorm": false,
         "model.layers.44.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1739,7 +1471,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.44.mlp.switch_mlp.activation": false,
         "model.layers.45.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1756,11 +1487,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.45.self_attn.q_norm": false,
-        "model.layers.45.self_attn.k_norm": false,
-        "model.layers.45.self_attn.rope": false,
-        "model.layers.45.input_layernorm": false,
-        "model.layers.45.post_attention_layernorm": false,
         "model.layers.45.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1777,7 +1503,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.45.mlp.switch_mlp.activation": false,
         "model.layers.46.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1794,11 +1519,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.46.self_attn.q_norm": false,
-        "model.layers.46.self_attn.k_norm": false,
-        "model.layers.46.self_attn.rope": false,
-        "model.layers.46.input_layernorm": false,
-        "model.layers.46.post_attention_layernorm": false,
         "model.layers.46.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1815,7 +1535,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.46.mlp.switch_mlp.activation": false,
         "model.layers.47.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1832,11 +1551,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.47.self_attn.q_norm": false,
-        "model.layers.47.self_attn.k_norm": false,
-        "model.layers.47.self_attn.rope": false,
-        "model.layers.47.input_layernorm": false,
-        "model.layers.47.post_attention_layernorm": false,
         "model.layers.47.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1853,7 +1567,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.47.mlp.switch_mlp.activation": false,
         "model.layers.48.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1870,11 +1583,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.48.self_attn.q_norm": false,
-        "model.layers.48.self_attn.k_norm": false,
-        "model.layers.48.self_attn.rope": false,
-        "model.layers.48.input_layernorm": false,
-        "model.layers.48.post_attention_layernorm": false,
         "model.layers.48.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1891,7 +1599,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.48.mlp.switch_mlp.activation": false,
         "model.layers.49.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1908,11 +1615,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.49.self_attn.q_norm": false,
-        "model.layers.49.self_attn.k_norm": false,
-        "model.layers.49.self_attn.rope": false,
-        "model.layers.49.input_layernorm": false,
-        "model.layers.49.post_attention_layernorm": false,
         "model.layers.49.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1929,7 +1631,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.49.mlp.switch_mlp.activation": false,
         "model.layers.50.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1946,11 +1647,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.50.self_attn.q_norm": false,
-        "model.layers.50.self_attn.k_norm": false,
-        "model.layers.50.self_attn.rope": false,
-        "model.layers.50.input_layernorm": false,
-        "model.layers.50.post_attention_layernorm": false,
         "model.layers.50.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -1967,7 +1663,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.50.mlp.switch_mlp.activation": false,
         "model.layers.51.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -1984,11 +1679,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.51.self_attn.q_norm": false,
-        "model.layers.51.self_attn.k_norm": false,
-        "model.layers.51.self_attn.rope": false,
-        "model.layers.51.input_layernorm": false,
-        "model.layers.51.post_attention_layernorm": false,
         "model.layers.51.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2005,7 +1695,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.51.mlp.switch_mlp.activation": false,
         "model.layers.52.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2022,11 +1711,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.52.self_attn.q_norm": false,
-        "model.layers.52.self_attn.k_norm": false,
-        "model.layers.52.self_attn.rope": false,
-        "model.layers.52.input_layernorm": false,
-        "model.layers.52.post_attention_layernorm": false,
         "model.layers.52.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2043,7 +1727,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.52.mlp.switch_mlp.activation": false,
         "model.layers.53.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2060,11 +1743,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.53.self_attn.q_norm": false,
-        "model.layers.53.self_attn.k_norm": false,
-        "model.layers.53.self_attn.rope": false,
-        "model.layers.53.input_layernorm": false,
-        "model.layers.53.post_attention_layernorm": false,
         "model.layers.53.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2081,7 +1759,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.53.mlp.switch_mlp.activation": false,
         "model.layers.54.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2098,11 +1775,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.54.self_attn.q_norm": false,
-        "model.layers.54.self_attn.k_norm": false,
-        "model.layers.54.self_attn.rope": false,
-        "model.layers.54.input_layernorm": false,
-        "model.layers.54.post_attention_layernorm": false,
         "model.layers.54.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2119,7 +1791,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.54.mlp.switch_mlp.activation": false,
         "model.layers.55.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2136,11 +1807,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.55.self_attn.q_norm": false,
-        "model.layers.55.self_attn.k_norm": false,
-        "model.layers.55.self_attn.rope": false,
-        "model.layers.55.input_layernorm": false,
-        "model.layers.55.post_attention_layernorm": false,
         "model.layers.55.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2157,7 +1823,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.55.mlp.switch_mlp.activation": false,
         "model.layers.56.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2174,11 +1839,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.56.self_attn.q_norm": false,
-        "model.layers.56.self_attn.k_norm": false,
-        "model.layers.56.self_attn.rope": false,
-        "model.layers.56.input_layernorm": false,
-        "model.layers.56.post_attention_layernorm": false,
         "model.layers.56.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2195,7 +1855,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.56.mlp.switch_mlp.activation": false,
         "model.layers.57.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2212,11 +1871,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.57.self_attn.q_norm": false,
-        "model.layers.57.self_attn.k_norm": false,
-        "model.layers.57.self_attn.rope": false,
-        "model.layers.57.input_layernorm": false,
-        "model.layers.57.post_attention_layernorm": false,
         "model.layers.57.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2233,7 +1887,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.57.mlp.switch_mlp.activation": false,
         "model.layers.58.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2250,11 +1903,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.58.self_attn.q_norm": false,
-        "model.layers.58.self_attn.k_norm": false,
-        "model.layers.58.self_attn.rope": false,
-        "model.layers.58.input_layernorm": false,
-        "model.layers.58.post_attention_layernorm": false,
         "model.layers.58.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2271,7 +1919,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.58.mlp.switch_mlp.activation": false,
         "model.layers.59.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2288,11 +1935,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.59.self_attn.q_norm": false,
-        "model.layers.59.self_attn.k_norm": false,
-        "model.layers.59.self_attn.rope": false,
-        "model.layers.59.input_layernorm": false,
-        "model.layers.59.post_attention_layernorm": false,
         "model.layers.59.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2309,7 +1951,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.59.mlp.switch_mlp.activation": false,
         "model.layers.60.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2326,11 +1967,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.60.self_attn.q_norm": false,
-        "model.layers.60.self_attn.k_norm": false,
-        "model.layers.60.self_attn.rope": false,
-        "model.layers.60.input_layernorm": false,
-        "model.layers.60.post_attention_layernorm": false,
         "model.layers.60.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2347,7 +1983,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.60.mlp.switch_mlp.activation": false,
         "model.layers.61.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2364,11 +1999,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.61.self_attn.q_norm": false,
-        "model.layers.61.self_attn.k_norm": false,
-        "model.layers.61.self_attn.rope": false,
-        "model.layers.61.input_layernorm": false,
-        "model.layers.61.post_attention_layernorm": false,
         "model.layers.61.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2385,8 +2015,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.61.mlp.switch_mlp.activation": false,
-        "model.norm": false,
         "lm_head": {
             "group_size": 64,
             "bits": 6
@@ -2395,6 +2023,7 @@
     "quantization_config": {
         "group_size": 64,
         "bits": 4,
         "model.embed_tokens": {
             "group_size": 64,
             "bits": 4
@@ -2415,11 +2044,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.0.self_attn.q_norm": false,
-        "model.layers.0.self_attn.k_norm": false,
-        "model.layers.0.self_attn.rope": false,
-        "model.layers.0.input_layernorm": false,
-        "model.layers.0.post_attention_layernorm": false,
         "model.layers.0.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2436,7 +2060,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.0.mlp.switch_mlp.activation": false,
         "model.layers.1.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2453,11 +2076,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.1.self_attn.q_norm": false,
-        "model.layers.1.self_attn.k_norm": false,
-        "model.layers.1.self_attn.rope": false,
-        "model.layers.1.input_layernorm": false,
-        "model.layers.1.post_attention_layernorm": false,
         "model.layers.1.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2474,7 +2092,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.1.mlp.switch_mlp.activation": false,
         "model.layers.2.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2491,11 +2108,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.2.self_attn.q_norm": false,
-        "model.layers.2.self_attn.k_norm": false,
-        "model.layers.2.self_attn.rope": false,
-        "model.layers.2.input_layernorm": false,
-        "model.layers.2.post_attention_layernorm": false,
         "model.layers.2.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2512,7 +2124,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.2.mlp.switch_mlp.activation": false,
         "model.layers.3.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2529,11 +2140,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.3.self_attn.q_norm": false,
-        "model.layers.3.self_attn.k_norm": false,
-        "model.layers.3.self_attn.rope": false,
-        "model.layers.3.input_layernorm": false,
-        "model.layers.3.post_attention_layernorm": false,
         "model.layers.3.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2550,7 +2156,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.3.mlp.switch_mlp.activation": false,
         "model.layers.4.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2567,11 +2172,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.4.self_attn.q_norm": false,
-        "model.layers.4.self_attn.k_norm": false,
-        "model.layers.4.self_attn.rope": false,
-        "model.layers.4.input_layernorm": false,
-        "model.layers.4.post_attention_layernorm": false,
         "model.layers.4.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2588,7 +2188,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.4.mlp.switch_mlp.activation": false,
         "model.layers.5.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2605,11 +2204,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.5.self_attn.q_norm": false,
-        "model.layers.5.self_attn.k_norm": false,
-        "model.layers.5.self_attn.rope": false,
-        "model.layers.5.input_layernorm": false,
-        "model.layers.5.post_attention_layernorm": false,
         "model.layers.5.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2626,7 +2220,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.5.mlp.switch_mlp.activation": false,
         "model.layers.6.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2643,11 +2236,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.6.self_attn.q_norm": false,
-        "model.layers.6.self_attn.k_norm": false,
-        "model.layers.6.self_attn.rope": false,
-        "model.layers.6.input_layernorm": false,
-        "model.layers.6.post_attention_layernorm": false,
         "model.layers.6.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2664,7 +2252,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.6.mlp.switch_mlp.activation": false,
         "model.layers.7.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2681,11 +2268,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.7.self_attn.q_norm": false,
-        "model.layers.7.self_attn.k_norm": false,
-        "model.layers.7.self_attn.rope": false,
-        "model.layers.7.input_layernorm": false,
-        "model.layers.7.post_attention_layernorm": false,
         "model.layers.7.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2702,7 +2284,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.7.mlp.switch_mlp.activation": false,
         "model.layers.8.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2719,11 +2300,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.8.self_attn.q_norm": false,
-        "model.layers.8.self_attn.k_norm": false,
-        "model.layers.8.self_attn.rope": false,
-        "model.layers.8.input_layernorm": false,
-        "model.layers.8.post_attention_layernorm": false,
         "model.layers.8.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2740,7 +2316,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.8.mlp.switch_mlp.activation": false,
         "model.layers.9.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2757,11 +2332,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.9.self_attn.q_norm": false,
-        "model.layers.9.self_attn.k_norm": false,
-        "model.layers.9.self_attn.rope": false,
-        "model.layers.9.input_layernorm": false,
-        "model.layers.9.post_attention_layernorm": false,
         "model.layers.9.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2778,7 +2348,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.9.mlp.switch_mlp.activation": false,
         "model.layers.10.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2795,11 +2364,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.10.self_attn.q_norm": false,
-        "model.layers.10.self_attn.k_norm": false,
-        "model.layers.10.self_attn.rope": false,
-        "model.layers.10.input_layernorm": false,
-        "model.layers.10.post_attention_layernorm": false,
         "model.layers.10.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2816,7 +2380,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.10.mlp.switch_mlp.activation": false,
         "model.layers.11.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2833,11 +2396,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.11.self_attn.q_norm": false,
-        "model.layers.11.self_attn.k_norm": false,
-        "model.layers.11.self_attn.rope": false,
-        "model.layers.11.input_layernorm": false,
-        "model.layers.11.post_attention_layernorm": false,
         "model.layers.11.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2854,7 +2412,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.11.mlp.switch_mlp.activation": false,
         "model.layers.12.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2871,11 +2428,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.12.self_attn.q_norm": false,
-        "model.layers.12.self_attn.k_norm": false,
-        "model.layers.12.self_attn.rope": false,
-        "model.layers.12.input_layernorm": false,
-        "model.layers.12.post_attention_layernorm": false,
         "model.layers.12.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2892,7 +2444,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.12.mlp.switch_mlp.activation": false,
         "model.layers.13.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2909,11 +2460,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.13.self_attn.q_norm": false,
-        "model.layers.13.self_attn.k_norm": false,
-        "model.layers.13.self_attn.rope": false,
-        "model.layers.13.input_layernorm": false,
-        "model.layers.13.post_attention_layernorm": false,
         "model.layers.13.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2930,7 +2476,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.13.mlp.switch_mlp.activation": false,
         "model.layers.14.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2947,11 +2492,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.14.self_attn.q_norm": false,
-        "model.layers.14.self_attn.k_norm": false,
-        "model.layers.14.self_attn.rope": false,
-        "model.layers.14.input_layernorm": false,
-        "model.layers.14.post_attention_layernorm": false,
         "model.layers.14.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -2968,7 +2508,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.14.mlp.switch_mlp.activation": false,
         "model.layers.15.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -2985,11 +2524,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.15.self_attn.q_norm": false,
-        "model.layers.15.self_attn.k_norm": false,
-        "model.layers.15.self_attn.rope": false,
-        "model.layers.15.input_layernorm": false,
-        "model.layers.15.post_attention_layernorm": false,
         "model.layers.15.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3006,7 +2540,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.15.mlp.switch_mlp.activation": false,
         "model.layers.16.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3023,11 +2556,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.16.self_attn.q_norm": false,
-        "model.layers.16.self_attn.k_norm": false,
-        "model.layers.16.self_attn.rope": false,
-        "model.layers.16.input_layernorm": false,
-        "model.layers.16.post_attention_layernorm": false,
         "model.layers.16.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3044,7 +2572,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.16.mlp.switch_mlp.activation": false,
         "model.layers.17.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3061,11 +2588,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.17.self_attn.q_norm": false,
-        "model.layers.17.self_attn.k_norm": false,
-        "model.layers.17.self_attn.rope": false,
-        "model.layers.17.input_layernorm": false,
-        "model.layers.17.post_attention_layernorm": false,
         "model.layers.17.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3082,7 +2604,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.17.mlp.switch_mlp.activation": false,
         "model.layers.18.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3099,11 +2620,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.18.self_attn.q_norm": false,
-        "model.layers.18.self_attn.k_norm": false,
-        "model.layers.18.self_attn.rope": false,
-        "model.layers.18.input_layernorm": false,
-        "model.layers.18.post_attention_layernorm": false,
         "model.layers.18.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3120,7 +2636,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.18.mlp.switch_mlp.activation": false,
         "model.layers.19.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3137,11 +2652,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.19.self_attn.q_norm": false,
-        "model.layers.19.self_attn.k_norm": false,
-        "model.layers.19.self_attn.rope": false,
-        "model.layers.19.input_layernorm": false,
-        "model.layers.19.post_attention_layernorm": false,
         "model.layers.19.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3158,7 +2668,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.19.mlp.switch_mlp.activation": false,
         "model.layers.20.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3175,11 +2684,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.20.self_attn.q_norm": false,
-        "model.layers.20.self_attn.k_norm": false,
-        "model.layers.20.self_attn.rope": false,
-        "model.layers.20.input_layernorm": false,
-        "model.layers.20.post_attention_layernorm": false,
         "model.layers.20.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3196,7 +2700,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.20.mlp.switch_mlp.activation": false,
         "model.layers.21.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3213,11 +2716,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.21.self_attn.q_norm": false,
-        "model.layers.21.self_attn.k_norm": false,
-        "model.layers.21.self_attn.rope": false,
-        "model.layers.21.input_layernorm": false,
-        "model.layers.21.post_attention_layernorm": false,
         "model.layers.21.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3234,7 +2732,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.21.mlp.switch_mlp.activation": false,
         "model.layers.22.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3251,11 +2748,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.22.self_attn.q_norm": false,
-        "model.layers.22.self_attn.k_norm": false,
-        "model.layers.22.self_attn.rope": false,
-        "model.layers.22.input_layernorm": false,
-        "model.layers.22.post_attention_layernorm": false,
         "model.layers.22.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3272,7 +2764,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.22.mlp.switch_mlp.activation": false,
         "model.layers.23.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3289,11 +2780,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.23.self_attn.q_norm": false,
-        "model.layers.23.self_attn.k_norm": false,
-        "model.layers.23.self_attn.rope": false,
-        "model.layers.23.input_layernorm": false,
-        "model.layers.23.post_attention_layernorm": false,
         "model.layers.23.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3310,7 +2796,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.23.mlp.switch_mlp.activation": false,
         "model.layers.24.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3327,11 +2812,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.24.self_attn.q_norm": false,
-        "model.layers.24.self_attn.k_norm": false,
-        "model.layers.24.self_attn.rope": false,
-        "model.layers.24.input_layernorm": false,
-        "model.layers.24.post_attention_layernorm": false,
         "model.layers.24.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3348,7 +2828,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.24.mlp.switch_mlp.activation": false,
         "model.layers.25.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3365,11 +2844,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.25.self_attn.q_norm": false,
-        "model.layers.25.self_attn.k_norm": false,
-        "model.layers.25.self_attn.rope": false,
-        "model.layers.25.input_layernorm": false,
-        "model.layers.25.post_attention_layernorm": false,
         "model.layers.25.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3386,7 +2860,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.25.mlp.switch_mlp.activation": false,
         "model.layers.26.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3403,11 +2876,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.26.self_attn.q_norm": false,
-        "model.layers.26.self_attn.k_norm": false,
-        "model.layers.26.self_attn.rope": false,
-        "model.layers.26.input_layernorm": false,
-        "model.layers.26.post_attention_layernorm": false,
         "model.layers.26.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3424,7 +2892,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.26.mlp.switch_mlp.activation": false,
         "model.layers.27.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3441,11 +2908,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.27.self_attn.q_norm": false,
-        "model.layers.27.self_attn.k_norm": false,
-        "model.layers.27.self_attn.rope": false,
-        "model.layers.27.input_layernorm": false,
-        "model.layers.27.post_attention_layernorm": false,
         "model.layers.27.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3462,7 +2924,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.27.mlp.switch_mlp.activation": false,
         "model.layers.28.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3479,11 +2940,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.28.self_attn.q_norm": false,
-        "model.layers.28.self_attn.k_norm": false,
-        "model.layers.28.self_attn.rope": false,
-        "model.layers.28.input_layernorm": false,
-        "model.layers.28.post_attention_layernorm": false,
         "model.layers.28.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3500,7 +2956,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.28.mlp.switch_mlp.activation": false,
         "model.layers.29.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3517,11 +2972,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.29.self_attn.q_norm": false,
-        "model.layers.29.self_attn.k_norm": false,
-        "model.layers.29.self_attn.rope": false,
-        "model.layers.29.input_layernorm": false,
-        "model.layers.29.post_attention_layernorm": false,
         "model.layers.29.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3538,7 +2988,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.29.mlp.switch_mlp.activation": false,
         "model.layers.30.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3555,11 +3004,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.30.self_attn.q_norm": false,
-        "model.layers.30.self_attn.k_norm": false,
-        "model.layers.30.self_attn.rope": false,
-        "model.layers.30.input_layernorm": false,
-        "model.layers.30.post_attention_layernorm": false,
         "model.layers.30.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3576,7 +3020,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.30.mlp.switch_mlp.activation": false,
         "model.layers.31.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3593,11 +3036,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.31.self_attn.q_norm": false,
-        "model.layers.31.self_attn.k_norm": false,
-        "model.layers.31.self_attn.rope": false,
-        "model.layers.31.input_layernorm": false,
-        "model.layers.31.post_attention_layernorm": false,
         "model.layers.31.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3614,7 +3052,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.31.mlp.switch_mlp.activation": false,
         "model.layers.32.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3631,11 +3068,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.32.self_attn.q_norm": false,
-        "model.layers.32.self_attn.k_norm": false,
-        "model.layers.32.self_attn.rope": false,
-        "model.layers.32.input_layernorm": false,
-        "model.layers.32.post_attention_layernorm": false,
         "model.layers.32.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3652,7 +3084,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.32.mlp.switch_mlp.activation": false,
         "model.layers.33.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3669,11 +3100,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.33.self_attn.q_norm": false,
-        "model.layers.33.self_attn.k_norm": false,
-        "model.layers.33.self_attn.rope": false,
-        "model.layers.33.input_layernorm": false,
-        "model.layers.33.post_attention_layernorm": false,
         "model.layers.33.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3690,7 +3116,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.33.mlp.switch_mlp.activation": false,
         "model.layers.34.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3707,11 +3132,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.34.self_attn.q_norm": false,
-        "model.layers.34.self_attn.k_norm": false,
-        "model.layers.34.self_attn.rope": false,
-        "model.layers.34.input_layernorm": false,
-        "model.layers.34.post_attention_layernorm": false,
         "model.layers.34.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3728,7 +3148,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.34.mlp.switch_mlp.activation": false,
         "model.layers.35.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3745,11 +3164,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.35.self_attn.q_norm": false,
-        "model.layers.35.self_attn.k_norm": false,
-        "model.layers.35.self_attn.rope": false,
-        "model.layers.35.input_layernorm": false,
-        "model.layers.35.post_attention_layernorm": false,
         "model.layers.35.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3766,7 +3180,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.35.mlp.switch_mlp.activation": false,
         "model.layers.36.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3783,11 +3196,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.36.self_attn.q_norm": false,
-        "model.layers.36.self_attn.k_norm": false,
-        "model.layers.36.self_attn.rope": false,
-        "model.layers.36.input_layernorm": false,
-        "model.layers.36.post_attention_layernorm": false,
         "model.layers.36.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3804,7 +3212,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.36.mlp.switch_mlp.activation": false,
         "model.layers.37.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3821,11 +3228,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.37.self_attn.q_norm": false,
-        "model.layers.37.self_attn.k_norm": false,
-        "model.layers.37.self_attn.rope": false,
-        "model.layers.37.input_layernorm": false,
-        "model.layers.37.post_attention_layernorm": false,
         "model.layers.37.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3842,7 +3244,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.37.mlp.switch_mlp.activation": false,
         "model.layers.38.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3859,11 +3260,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.38.self_attn.q_norm": false,
-        "model.layers.38.self_attn.k_norm": false,
-        "model.layers.38.self_attn.rope": false,
-        "model.layers.38.input_layernorm": false,
-        "model.layers.38.post_attention_layernorm": false,
         "model.layers.38.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3880,7 +3276,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.38.mlp.switch_mlp.activation": false,
         "model.layers.39.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3897,11 +3292,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.39.self_attn.q_norm": false,
-        "model.layers.39.self_attn.k_norm": false,
-        "model.layers.39.self_attn.rope": false,
-        "model.layers.39.input_layernorm": false,
-        "model.layers.39.post_attention_layernorm": false,
         "model.layers.39.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3918,7 +3308,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.39.mlp.switch_mlp.activation": false,
         "model.layers.40.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3935,11 +3324,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.40.self_attn.q_norm": false,
-        "model.layers.40.self_attn.k_norm": false,
-        "model.layers.40.self_attn.rope": false,
-        "model.layers.40.input_layernorm": false,
-        "model.layers.40.post_attention_layernorm": false,
         "model.layers.40.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3956,7 +3340,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.40.mlp.switch_mlp.activation": false,
         "model.layers.41.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -3973,11 +3356,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.41.self_attn.q_norm": false,
-        "model.layers.41.self_attn.k_norm": false,
-        "model.layers.41.self_attn.rope": false,
-        "model.layers.41.input_layernorm": false,
-        "model.layers.41.post_attention_layernorm": false,
         "model.layers.41.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -3994,7 +3372,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.41.mlp.switch_mlp.activation": false,
         "model.layers.42.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4011,11 +3388,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.42.self_attn.q_norm": false,
-        "model.layers.42.self_attn.k_norm": false,
-        "model.layers.42.self_attn.rope": false,
-        "model.layers.42.input_layernorm": false,
-        "model.layers.42.post_attention_layernorm": false,
         "model.layers.42.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4032,7 +3404,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.42.mlp.switch_mlp.activation": false,
         "model.layers.43.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4049,11 +3420,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.43.self_attn.q_norm": false,
-        "model.layers.43.self_attn.k_norm": false,
-        "model.layers.43.self_attn.rope": false,
-        "model.layers.43.input_layernorm": false,
-        "model.layers.43.post_attention_layernorm": false,
         "model.layers.43.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4070,7 +3436,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.43.mlp.switch_mlp.activation": false,
         "model.layers.44.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4087,11 +3452,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.44.self_attn.q_norm": false,
-        "model.layers.44.self_attn.k_norm": false,
-        "model.layers.44.self_attn.rope": false,
-        "model.layers.44.input_layernorm": false,
-        "model.layers.44.post_attention_layernorm": false,
         "model.layers.44.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4108,7 +3468,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.44.mlp.switch_mlp.activation": false,
         "model.layers.45.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4125,11 +3484,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.45.self_attn.q_norm": false,
-        "model.layers.45.self_attn.k_norm": false,
-        "model.layers.45.self_attn.rope": false,
-        "model.layers.45.input_layernorm": false,
-        "model.layers.45.post_attention_layernorm": false,
         "model.layers.45.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4146,7 +3500,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.45.mlp.switch_mlp.activation": false,
         "model.layers.46.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4163,11 +3516,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.46.self_attn.q_norm": false,
-        "model.layers.46.self_attn.k_norm": false,
-        "model.layers.46.self_attn.rope": false,
-        "model.layers.46.input_layernorm": false,
-        "model.layers.46.post_attention_layernorm": false,
         "model.layers.46.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4184,7 +3532,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.46.mlp.switch_mlp.activation": false,
         "model.layers.47.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4201,11 +3548,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.47.self_attn.q_norm": false,
-        "model.layers.47.self_attn.k_norm": false,
-        "model.layers.47.self_attn.rope": false,
-        "model.layers.47.input_layernorm": false,
-        "model.layers.47.post_attention_layernorm": false,
         "model.layers.47.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4222,7 +3564,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.47.mlp.switch_mlp.activation": false,
         "model.layers.48.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4239,11 +3580,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.48.self_attn.q_norm": false,
-        "model.layers.48.self_attn.k_norm": false,
-        "model.layers.48.self_attn.rope": false,
-        "model.layers.48.input_layernorm": false,
-        "model.layers.48.post_attention_layernorm": false,
         "model.layers.48.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4260,7 +3596,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.48.mlp.switch_mlp.activation": false,
         "model.layers.49.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4277,11 +3612,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.49.self_attn.q_norm": false,
-        "model.layers.49.self_attn.k_norm": false,
-        "model.layers.49.self_attn.rope": false,
-        "model.layers.49.input_layernorm": false,
-        "model.layers.49.post_attention_layernorm": false,
         "model.layers.49.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4298,7 +3628,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.49.mlp.switch_mlp.activation": false,
         "model.layers.50.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4315,11 +3644,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.50.self_attn.q_norm": false,
-        "model.layers.50.self_attn.k_norm": false,
-        "model.layers.50.self_attn.rope": false,
-        "model.layers.50.input_layernorm": false,
-        "model.layers.50.post_attention_layernorm": false,
         "model.layers.50.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4336,7 +3660,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.50.mlp.switch_mlp.activation": false,
         "model.layers.51.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4353,11 +3676,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.51.self_attn.q_norm": false,
-        "model.layers.51.self_attn.k_norm": false,
-        "model.layers.51.self_attn.rope": false,
-        "model.layers.51.input_layernorm": false,
-        "model.layers.51.post_attention_layernorm": false,
         "model.layers.51.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4374,7 +3692,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.51.mlp.switch_mlp.activation": false,
         "model.layers.52.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4391,11 +3708,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.52.self_attn.q_norm": false,
-        "model.layers.52.self_attn.k_norm": false,
-        "model.layers.52.self_attn.rope": false,
-        "model.layers.52.input_layernorm": false,
-        "model.layers.52.post_attention_layernorm": false,
         "model.layers.52.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4412,7 +3724,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.52.mlp.switch_mlp.activation": false,
         "model.layers.53.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4429,11 +3740,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.53.self_attn.q_norm": false,
-        "model.layers.53.self_attn.k_norm": false,
-        "model.layers.53.self_attn.rope": false,
-        "model.layers.53.input_layernorm": false,
-        "model.layers.53.post_attention_layernorm": false,
         "model.layers.53.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4450,7 +3756,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.53.mlp.switch_mlp.activation": false,
         "model.layers.54.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4467,11 +3772,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.54.self_attn.q_norm": false,
-        "model.layers.54.self_attn.k_norm": false,
-        "model.layers.54.self_attn.rope": false,
-        "model.layers.54.input_layernorm": false,
-        "model.layers.54.post_attention_layernorm": false,
         "model.layers.54.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4488,7 +3788,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.54.mlp.switch_mlp.activation": false,
         "model.layers.55.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4505,11 +3804,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.55.self_attn.q_norm": false,
-        "model.layers.55.self_attn.k_norm": false,
-        "model.layers.55.self_attn.rope": false,
-        "model.layers.55.input_layernorm": false,
-        "model.layers.55.post_attention_layernorm": false,
         "model.layers.55.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4526,7 +3820,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.55.mlp.switch_mlp.activation": false,
         "model.layers.56.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4543,11 +3836,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.56.self_attn.q_norm": false,
-        "model.layers.56.self_attn.k_norm": false,
-        "model.layers.56.self_attn.rope": false,
-        "model.layers.56.input_layernorm": false,
-        "model.layers.56.post_attention_layernorm": false,
         "model.layers.56.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4564,7 +3852,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.56.mlp.switch_mlp.activation": false,
         "model.layers.57.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4581,11 +3868,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.57.self_attn.q_norm": false,
-        "model.layers.57.self_attn.k_norm": false,
-        "model.layers.57.self_attn.rope": false,
-        "model.layers.57.input_layernorm": false,
-        "model.layers.57.post_attention_layernorm": false,
         "model.layers.57.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4602,7 +3884,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.57.mlp.switch_mlp.activation": false,
         "model.layers.58.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4619,11 +3900,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.58.self_attn.q_norm": false,
-        "model.layers.58.self_attn.k_norm": false,
-        "model.layers.58.self_attn.rope": false,
-        "model.layers.58.input_layernorm": false,
-        "model.layers.58.post_attention_layernorm": false,
         "model.layers.58.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4640,7 +3916,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.58.mlp.switch_mlp.activation": false,
         "model.layers.59.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4657,11 +3932,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.59.self_attn.q_norm": false,
-        "model.layers.59.self_attn.k_norm": false,
-        "model.layers.59.self_attn.rope": false,
-        "model.layers.59.input_layernorm": false,
-        "model.layers.59.post_attention_layernorm": false,
         "model.layers.59.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4678,7 +3948,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.59.mlp.switch_mlp.activation": false,
         "model.layers.60.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4695,11 +3964,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.60.self_attn.q_norm": false,
-        "model.layers.60.self_attn.k_norm": false,
-        "model.layers.60.self_attn.rope": false,
-        "model.layers.60.input_layernorm": false,
-        "model.layers.60.post_attention_layernorm": false,
         "model.layers.60.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4716,7 +3980,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.60.mlp.switch_mlp.activation": false,
         "model.layers.61.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
@@ -4733,11 +3996,6 @@
             "group_size": 64,
             "bits": 4
         },
-        "model.layers.61.self_attn.q_norm": false,
-        "model.layers.61.self_attn.k_norm": false,
-        "model.layers.61.self_attn.rope": false,
-        "model.layers.61.input_layernorm": false,
-        "model.layers.61.post_attention_layernorm": false,
         "model.layers.61.mlp.gate": {
             "group_size": 64,
             "bits": 4
@@ -4754,8 +4012,6 @@
             "group_size": 64,
             "bits": 6
         },
-        "model.layers.61.mlp.switch_mlp.activation": false,
-        "model.norm": false,
         "lm_head": {
             "group_size": 64,
             "bits": 6

     "quantization": {
         "group_size": 64,
         "bits": 4,
+        "mode": "affine",
         "model.embed_tokens": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.0.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.1.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.1.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.2.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.2.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.3.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.3.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.4.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.4.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.5.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.5.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.6.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.6.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.7.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.7.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.8.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.8.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.9.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.9.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.10.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.10.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.11.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.11.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.12.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.12.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.13.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.13.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.14.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.14.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.15.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.15.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.16.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.16.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.17.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.17.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.18.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.18.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.19.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.19.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.20.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.20.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.21.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.21.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.22.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.22.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.23.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.23.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.24.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.24.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.25.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.25.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.26.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.26.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.27.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.27.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.28.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.28.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.29.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.29.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.30.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.30.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.31.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.31.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.32.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.32.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.33.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.33.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.34.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.34.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.35.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.35.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.36.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.36.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.37.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.37.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.38.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.38.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.39.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.39.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.40.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.40.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.41.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.41.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.42.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.42.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.43.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.43.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.44.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.44.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.45.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.45.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.46.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.46.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.47.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.47.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.48.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.48.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.49.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.49.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.50.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.50.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.51.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.51.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.52.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.52.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.53.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.53.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.54.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.54.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.55.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.55.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.56.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.56.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.57.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.57.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.58.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.58.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.59.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.59.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.60.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.60.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.61.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.61.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "lm_head": {
             "group_size": 64,
             "bits": 6
     "quantization_config": {
         "group_size": 64,
         "bits": 4,
+        "mode": "affine",
         "model.embed_tokens": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.0.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.1.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.1.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.2.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.2.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.3.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.3.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.4.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.4.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.5.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.5.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.6.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.6.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.7.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.7.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.8.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.8.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.9.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.9.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.10.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.10.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.11.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.11.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.12.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.12.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.13.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.13.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.14.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.14.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.15.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.15.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.16.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.16.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.17.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.17.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.18.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.18.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.19.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.19.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.20.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.20.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.21.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.21.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.22.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.22.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.23.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.23.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.24.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.24.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.25.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.25.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.26.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.26.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.27.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.27.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.28.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.28.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.29.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.29.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.30.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.30.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.31.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.31.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.32.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.32.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.33.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.33.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.34.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.34.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.35.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.35.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.36.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.36.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.37.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.37.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.38.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.38.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.39.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.39.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.40.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.40.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.41.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.41.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.42.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.42.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.43.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.43.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.44.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.44.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.45.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.45.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.46.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.46.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.47.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.47.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.48.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.48.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.49.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.49.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.50.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.50.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.51.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.51.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.52.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.52.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.53.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.53.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.54.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.54.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.55.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.55.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.56.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.56.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.57.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.57.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.58.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.58.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.59.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.59.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.60.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.60.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "model.layers.61.self_attn.q_proj": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 4
         },
         "model.layers.61.mlp.gate": {
             "group_size": 64,
             "bits": 4
             "group_size": 64,
             "bits": 6
         },
         "lm_head": {
             "group_size": 64,
             "bits": 6

qwen3coder_tool_parser.py CHANGED Viewed

@@ -1,34 +1,30 @@
 # SPDX-License-Identifier: Apache-2.0
 import json
-import re
 import uuid
 from collections.abc import Sequence
-from typing import Union, Optional, Any, List, Dict
-from enum import Enum
-from vllm.entrypoints.openai.protocol import (
-    ChatCompletionRequest,
-    ChatCompletionToolsParam,
-    DeltaMessage,
-    DeltaToolCall,
-    DeltaFunctionCall,
-    ExtractedToolCallInformation,
-    FunctionCall,
-    ToolCall,
-)
 from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
-    ToolParser,
-    ToolParserManager,
-)
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 logger = init_logger(__name__)
-@ToolParserManager.register_module("qwen3_xml")
-class Qwen3XMLToolParser(ToolParser):
     def __init__(self, tokenizer: AnyTokenizer):
         super().__init__(tokenizer)
@@ -52,34 +48,32 @@ class Qwen3XMLToolParser(ToolParser):
         # Regex patterns
         self.tool_call_complete_regex = re.compile(
-            r"<tool_call>(.*?)</tool_call>", re.DOTALL
-        )
         self.tool_call_regex = re.compile(
-            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL
-        )
         self.tool_call_function_regex = re.compile(
-            r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL
-        )
         self.tool_call_parameter_regex = re.compile(
-            r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL
-        )
         if not self.model_tokenizer:
             raise ValueError(
                 "The model tokenizer must be passed to the ToolParser "
-                "constructor during construction."
-            )
-        self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
         self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
         if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
             raise RuntimeError(
                 "Qwen3 XML Tool parser could not locate tool call start/end "
-                "tokens in the tokenizer!"
-            )
-        logger.info(f"vLLM Successfully import tool parser {self.__class__.__name__} !")
     def _generate_tool_call_id(self) -> str:
         """Generate a unique tool call ID."""
@@ -100,130 +94,130 @@ class Qwen3XMLToolParser(ToolParser):
         self.accumulated_text = ""
         self.json_started = False
         self.json_closed = False
-    def _parse_xml_function_call(
-        self, function_call_str: str, tools: Optional[list[ChatCompletionToolsParam]]
-    ) -> Optional[ToolCall]:
-        def get_arguments_config(func_name: str) -> dict:
-            if tools is None:
-                return {}
-            for config in tools:
-                if not hasattr(config, "type") or not (
-                    hasattr(config, "function") and hasattr(config.function, "name")
-                ):
-                    continue
-                if config.type == "function" and config.function.name == func_name:
-                    if not hasattr(config.function, "parameters"):
-                        return {}
-                    params = config.function.parameters
-                    if isinstance(params, dict) and "properties" in params:
-                        return params["properties"]
-                    elif isinstance(params, dict):
-                        return params
-                    else:
-                        return {}
-            logger.warning(f"Tool '{func_name}' is not defined in the tools list.")
             return {}
-        def convert_param_value(
-            param_value: str, param_name: str, param_config: dict, func_name: str
-        ) -> Any:
-            # Handle null value for any type
-            if param_value.lower() == "null":
-                return None
-            if param_name not in param_config:
-                if param_config != {}:
-                    logger.warning(
-                        f"Parsed parameter '{param_name}' is not defined in the tool "
-                        f"parameters for tool '{func_name}', directly returning the string value."
-                    )
-                return param_value
-            if (
-                isinstance(param_config[param_name], dict)
-                and "type" in param_config[param_name]
-            ):
-                param_type = str(param_config[param_name]["type"]).strip().lower()
-            else:
-                param_type = "string"
-            if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
-                return param_value
-            elif (
-                param_type.startswith("int")
-                or param_type.startswith("uint")
-                or param_type.startswith("long")
-                or param_type.startswith("short")
-                or param_type.startswith("unsigned")
-            ):
-                try:
-                    param_value = int(param_value)
-                except:
-                    logger.warning(
-                        f"Parsed value '{param_value}' of parameter '{param_name}' is not an integer in tool "
-                        f"'{func_name}', degenerating to string."
-                    )
-                return param_value
-            elif param_type.startswith("num") or param_type.startswith("float"):
-                try:
-                    float_param_value = float(param_value)
-                    param_value = float_param_value if float_param_value - int(float_param_value) != 0 else int(float_param_value)
-                except:
-                    logger.warning(
-                        f"Parsed value '{param_value}' of parameter '{param_name}' is not a float in tool "
-                        f"'{func_name}', degenerating to string."
-                    )
-                return param_value
-            elif param_type in ["boolean", "bool", "binary"]:
-                param_value = param_value.lower()
-                if param_value not in ["true", "false"]:
-                    logger.warning(
-                        f"Parsed value '{param_value}' of parameter '{param_name}' is not a boolean (`true` of `false`) in tool '{func_name}', degenerating to false."
-                    )
-                return param_value == "true"
-            else:
-                if param_type == "object" or param_type.startswith("dict"):
-                    try:
-                        param_value = json.loads(param_value)
-                        return param_value
-                    except:
-                        logger.warning(
-                            f"Parsed value '{param_value}' of parameter '{param_name}' is not a valid JSON object in tool "
-                            f"'{func_name}', will try other methods to parse it."
-                        )
                 try:
-                    param_value = eval(param_value)
                 except:
                     logger.warning(
-                        f"Parsed value '{param_value}' of parameter '{param_name}' cannot be converted via Python `eval()` in tool '{func_name}', degenerating to string."
-                    )
-                return param_value
         # Extract function name
         end_index = function_call_str.index(">")
         function_name = function_call_str[:end_index]
-        param_config = get_arguments_config(function_name)
-        parameters = function_call_str[end_index + 1 :]
         param_dict = {}
-        for match in self.tool_call_parameter_regex.findall(parameters):
-            match_text = match[0] if match[0] else match[1]
             idx = match_text.index(">")
             param_name = match_text[:idx]
-            param_value = str(match_text[idx + 1 :])
             # Remove prefix and trailing \n
             if param_value.startswith("\n"):
                 param_value = param_value[1:]
             if param_value.endswith("\n"):
                 param_value = param_value[:-1]
-            param_dict[param_name] = convert_param_value(
-                param_value, param_name, param_config, function_name
-            )
         return ToolCall(
             type="function",
-            function=FunctionCall(
-                name=function_name, arguments=json.dumps(param_dict, ensure_ascii=False)
-            ),
         )
     def _get_function_calls(self, model_output: str) -> List[str]:
@@ -239,7 +233,8 @@ class Qwen3XMLToolParser(ToolParser):
         raw_function_calls = []
         for tool_call in raw_tool_calls:
-            raw_function_calls.extend(self.tool_call_function_regex.findall(tool_call))
         function_calls = [
             match[0] if match[0] else match[1] for match in raw_function_calls
@@ -253,16 +248,16 @@ class Qwen3XMLToolParser(ToolParser):
     ) -> ExtractedToolCallInformation:
         # Quick check to avoid unnecessary processing
         if self.tool_call_prefix not in model_output:
-            return ExtractedToolCallInformation(
-                tools_called=False, tool_calls=[], content=model_output
-            )
         try:
             function_calls = self._get_function_calls(model_output)
             if len(function_calls) == 0:
-                return ExtractedToolCallInformation(
-                    tools_called=False, tool_calls=[], content=model_output
-                )
             tool_calls = [
                 self._parse_xml_function_call(function_call_str, request.tools)
@@ -273,20 +268,17 @@ class Qwen3XMLToolParser(ToolParser):
             self.prev_tool_call_arr.clear()  # Clear previous calls
             for tool_call in tool_calls:
                 if tool_call:
-                    self.prev_tool_call_arr.append(
-                        {
-                            "name": tool_call.function.name,
-                            "arguments": tool_call.function.arguments,
-                        }
-                    )
             # Extract content before tool calls
             content_index = model_output.find(self.tool_call_start_token)
-            content_index = (
-                content_index
-                if content_index >= 0
-                else model_output.find(self.tool_call_prefix)
-            )
             content = model_output[:content_index]  # .rstrip()
             return ExtractedToolCallInformation(
@@ -297,9 +289,9 @@ class Qwen3XMLToolParser(ToolParser):
         except Exception:
             logger.exception("Error in extracting tool call from response.")
-            return ExtractedToolCallInformation(
-                tools_called=False, tool_calls=[], content=model_output
-            )
     def extract_tool_calls_streaming(
         self,
@@ -311,6 +303,11 @@ class Qwen3XMLToolParser(ToolParser):
         delta_token_ids: Sequence[int],
         request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         # If no delta text, return None unless it's an EOS token after tool calls
         if not delta_text:
             # Check if this is an EOS token after all tool calls are complete
@@ -319,15 +316,14 @@ class Qwen3XMLToolParser(ToolParser):
             if delta_token_ids and self.tool_call_end_token_id not in delta_token_ids:
                 # Count complete tool calls
                 complete_calls = len(
-                    self.tool_call_complete_regex.findall(current_text)
-                )
                 # If we have completed tool calls and populated prev_tool_call_arr
                 if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
                     # Check if all tool calls are closed
                     open_calls = current_text.count(
-                        self.tool_call_start_token
-                    ) - current_text.count(self.tool_call_end_token)
                     if open_calls == 0:
                         # Return empty delta message to allow finish_reason processing
                         return DeltaMessage(content="")
@@ -336,10 +332,6 @@ class Qwen3XMLToolParser(ToolParser):
                     return DeltaMessage(content="")
             return None
-        # Check if this is the first call (reset state if needed)
-        if not previous_text:
-            self._reset_streaming_state()
         # Update accumulated text
         self.accumulated_text = current_text
@@ -354,6 +346,7 @@ class Qwen3XMLToolParser(ToolParser):
                 self.param_count = 0
                 self.json_started = False
                 self.json_closed = False
                 # Check if there are more tool calls
                 tool_starts = current_text.count(self.tool_call_start_token)
@@ -366,16 +359,12 @@ class Qwen3XMLToolParser(ToolParser):
         # Handle normal content before tool calls
         if not self.is_tool_call_started:
             # Check if tool call is starting
-            if (
-                self.tool_call_start_token_id in delta_token_ids
-                or self.tool_call_start_token in delta_text
-            ):
                 self.is_tool_call_started = True
                 # Return any content before the tool call
                 if self.tool_call_start_token in delta_text:
-                    content_before = delta_text[
-                        : delta_text.index(self.tool_call_start_token)
-                    ]
                     if content_before:
                         return DeltaMessage(content=content_before)
                 return None
@@ -412,20 +401,19 @@ class Qwen3XMLToolParser(ToolParser):
         tool_start_idx = tool_starts[self.current_tool_index]
         # Find where this tool call ends (or current position if not ended yet)
-        tool_end_idx = current_text.find(self.tool_call_end_token, tool_start_idx)
         if tool_end_idx == -1:
             tool_text = current_text[tool_start_idx:]
         else:
-            tool_text = current_text[
-                tool_start_idx : tool_end_idx + len(self.tool_call_end_token)
-            ]
         # Looking for function header
         if not self.header_sent:
             if self.tool_call_prefix in tool_text:
                 func_start = tool_text.find(self.tool_call_prefix) + len(
-                    self.tool_call_prefix
-                )
                 func_end = tool_text.find(">", func_start)
                 if func_end != -1:
@@ -439,44 +427,37 @@ class Qwen3XMLToolParser(ToolParser):
                     # This ensures finish_reason="tool_calls" even if parsing isn't complete
                     already_added = any(
                         tool.get("name") == self.current_function_name
-                        for tool in self.prev_tool_call_arr
-                    )
                     if not already_added:
-                        self.prev_tool_call_arr.append(
-                            {
-                                "name": self.current_function_name,
-                                "arguments": "{}",  # Placeholder, will be updated later
-                            }
-                        )
                     # Send header with function info
-                    return DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                index=self.current_tool_index,
-                                id=self.current_tool_id,
-                                function=DeltaFunctionCall(
-                                    name=self.current_function_name, arguments=""
-                                ),
-                                type="function",
-                            )
-                        ]
-                    )
             return None
         # We've sent header, now handle function body
         if self.in_function:
             # Send opening brace if not sent yet
-            if not self.json_started and not self.parameter_prefix in delta_text:
                 self.json_started = True
-                return DeltaMessage(
-                    tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            function=DeltaFunctionCall(arguments="{"),
-                        )
-                    ]
-                )
             # Make sure json_started is set if we're processing parameters
             if not self.json_started:
@@ -490,58 +471,54 @@ class Qwen3XMLToolParser(ToolParser):
                 # Extract the complete tool call to update prev_tool_call_arr with final arguments
                 # Find the function content
                 func_start = tool_text.find(self.tool_call_prefix) + len(
-                    self.tool_call_prefix
-                )
-                func_content_end = tool_text.find(self.function_end_token, func_start)
                 if func_content_end != -1:
                     func_content = tool_text[func_start:func_content_end]
                     # Parse to get the complete arguments
                     try:
                         parsed_tool = self._parse_xml_function_call(
-                            func_content, request.tools if request else None
-                        )
                         if parsed_tool:
                             # Update existing entry in prev_tool_call_arr with complete arguments
                             for i, tool in enumerate(self.prev_tool_call_arr):
-                                if tool.get("name") == parsed_tool.function.name:
-                                    self.prev_tool_call_arr[i]["arguments"] = (
-                                        parsed_tool.function.arguments
-                                    )
                                     break
                     except Exception:
                         pass  # Ignore parsing errors during streaming
-                result = DeltaMessage(
-                    tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            function=DeltaFunctionCall(arguments="}"),
-                        )
-                    ]
-                )
                 # Reset state for next tool
                 self.in_function = False
                 self.json_closed = True
                 return result
             # Look for parameters
-            # Count how many complete parameters we have processed
-            complete_params = tool_text.count(self.parameter_end_token)
             # Check if we should start a new parameter
-            if not self.in_param and self.param_count < complete_params:
-                # Find the unprocessed parameter
-                # Count parameter starts
-                param_starts = []
-                idx = 0
-                while True:
-                    idx = tool_text.find(self.parameter_prefix, idx)
-                    if idx == -1:
-                        break
-                    param_starts.append(idx)
-                    idx += len(self.parameter_prefix)
                 if len(param_starts) > self.param_count:
                     # Process the next parameter
@@ -561,45 +538,74 @@ class Qwen3XMLToolParser(ToolParser):
                             value_text = value_text[1:]
                         # Find where this parameter ends
-                        param_end_idx = value_text.find(self.parameter_end_token)
                         if param_end_idx != -1:
                             # Complete parameter found
                             param_value = value_text[:param_end_idx]
                             if param_value.endswith("\n"):
                                 param_value = param_value[:-1]
-                            # Build complete JSON fragment for this parameter
                             if self.param_count == 0:
-                                json_fragment = (
-                                    '"'
-                                    + self.current_param_name
-                                    + '": "'
-                                    + json.dumps(param_value)[1:-1]
-                                    + '"'
-                                )
                             else:
-                                json_fragment = (
-                                    ', "'
-                                    + self.current_param_name
-                                    + '": "'
-                                    + json.dumps(param_value)[1:-1]
-                                    + '"'
-                                )
                             self.param_count += 1
-                            return DeltaMessage(
-                                tool_calls=[
-                                    DeltaToolCall(
-                                        index=self.current_tool_index,
-                                        function=DeltaFunctionCall(
-                                            arguments=json_fragment
-                                        ),
-                                    )
-                                ]
-                            )
-            # Continue parameter value
             if self.in_param:
                 if self.parameter_end_token in delta_text:
                     # End of parameter
@@ -609,34 +615,45 @@ class Qwen3XMLToolParser(ToolParser):
                     # Skip past > if at start
                     if not self.current_param_value and ">" in value_chunk:
                         gt_idx = value_chunk.find(">")
-                        value_chunk = value_chunk[gt_idx + 1 :]
-                    if not self.current_param_value and value_chunk.startswith("\n"):
                         value_chunk = value_chunk[1:]
-                    # Calculate incremental JSON
                     full_value = self.current_param_value + value_chunk
-                    prev_escaped = (
-                        json.dumps(self.current_param_value)[1:-1]
-                        if self.current_param_value
-                        else ""
-                    )
-                    full_escaped = json.dumps(full_value)[1:-1]
-                    delta_escaped = full_escaped[len(prev_escaped) :]
                     self.in_param = False
                     self.current_param_value = ""
-                    return DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                index=self.current_tool_index,
-                                function=DeltaFunctionCall(
-                                    arguments=delta_escaped + '"'
-                                ),
-                            )
-                        ]
-                    )
                 else:
                     # Continue accumulating value
                     value_chunk = delta_text
@@ -644,32 +661,29 @@ class Qwen3XMLToolParser(ToolParser):
                     # Handle first chunk after param name
                     if not self.current_param_value and ">" in value_chunk:
                         gt_idx = value_chunk.find(">")
-                        value_chunk = value_chunk[gt_idx + 1 :]
-                    if not self.current_param_value and value_chunk.startswith("\n"):
                         value_chunk = value_chunk[1:]
                     if value_chunk:
                         # Stream the escaped delta
-                        prev_escaped = (
-                            json.dumps(self.current_param_value)[1:-1]
-                            if self.current_param_value
-                            else ""
-                        )
                         self.current_param_value += value_chunk
-                        full_escaped = json.dumps(self.current_param_value)[1:-1]
-                        delta_escaped = full_escaped[len(prev_escaped) :]
                         if delta_escaped:
-                            return DeltaMessage(
-                                tool_calls=[
-                                    DeltaToolCall(
-                                        index=self.current_tool_index,
-                                        function=DeltaFunctionCall(
-                                            arguments=delta_escaped
-                                        ),
-                                    )
-                                ]
-                            )
         return None

 # SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import ast
 import json
 import uuid
 from collections.abc import Sequence
+from typing import Any, List, Optional, Union
+import regex as re
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+                                              ChatCompletionToolsParam,
+                                              DeltaFunctionCall, DeltaMessage,
+                                              DeltaToolCall,
+                                              ExtractedToolCallInformation,
+                                              FunctionCall, ToolCall)
 from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
+    ToolParser, ToolParserManager)
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 logger = init_logger(__name__)
+@ToolParserManager.register_module("qwen3_coder")
+class Qwen3CoderToolParser(ToolParser):
     def __init__(self, tokenizer: AnyTokenizer):
         super().__init__(tokenizer)
         # Regex patterns
         self.tool_call_complete_regex = re.compile(
+            r"<tool_call>(.*?)</tool_call>", re.DOTALL)
         self.tool_call_regex = re.compile(
+            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
         self.tool_call_function_regex = re.compile(
+            r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
         self.tool_call_parameter_regex = re.compile(
+            r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)",
+            re.DOTALL)
         if not self.model_tokenizer:
             raise ValueError(
                 "The model tokenizer must be passed to the ToolParser "
+                "constructor during construction.")
+        self.tool_call_start_token_id = self.vocab.get(
+            self.tool_call_start_token)
         self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
         if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
             raise RuntimeError(
                 "Qwen3 XML Tool parser could not locate tool call start/end "
+                "tokens in the tokenizer!")
+        logger.info(
+            f"vLLM Successfully import tool parser {self.__class__.__name__} !"
+        )
     def _generate_tool_call_id(self) -> str:
         """Generate a unique tool call ID."""
         self.accumulated_text = ""
         self.json_started = False
         self.json_closed = False
+        # Store accumulated parameters for type conversion
+        self.accumulated_params = {}
+        self.streaming_request = None
+    def _get_arguments_config(
+            self, func_name: str,
+            tools: Optional[list[ChatCompletionToolsParam]]) -> dict:
+        """Extract argument configuration for a function."""
+        if tools is None:
             return {}
+        for config in tools:
+            if not hasattr(config, "type") or not (hasattr(
+                    config, "function") and hasattr(config.function, "name")):
+                continue
+            if config.type == "function" and config.function.name == func_name:
+                if not hasattr(config.function, "parameters"):
+                    return {}
+                params = config.function.parameters
+                if isinstance(params, dict) and "properties" in params:
+                    return params["properties"]
+                elif isinstance(params, dict):
+                    return params
+                else:
+                    return {}
+        logger.warning(f"Tool '{func_name}' is not defined in the tools list.")
+        return {}
+    def _convert_param_value(self, param_value: str, param_name: str,
+                             param_config: dict, func_name: str) -> Any:
+        """Convert parameter value based on its type in the schema."""
+        # Handle null value for any type
+        if param_value.lower() == "null":
+            return None
+        if param_name not in param_config:
+            if param_config != {}:
+                logger.warning(
+                    f"Parsed parameter '{param_name}' is not defined in the tool "
+                    f"parameters for tool '{func_name}', directly returning the string value."
+                )
+            return param_value
+        if isinstance(param_config[param_name],
+                      dict) and "type" in param_config[param_name]:
+            param_type = str(param_config[param_name]["type"]).strip().lower()
+        else:
+            param_type = "string"
+        if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
+            return param_value
+        elif param_type.startswith("int") or param_type.startswith(
+                "uint") or param_type.startswith(
+                    "long") or param_type.startswith(
+                        "short") or param_type.startswith("unsigned"):
+            try:
+                param_value = int(param_value)
+            except:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' is not an integer in tool "
+                    f"'{func_name}', degenerating to string.")
+            return param_value
+        elif param_type.startswith("num") or param_type.startswith("float"):
+            try:
+                float_param_value = float(param_value)
+                param_value = float_param_value if float_param_value - int(
+                    float_param_value) != 0 else int(float_param_value)
+            except:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' is not a float in tool "
+                    f"'{func_name}', degenerating to string.")
+            return param_value
+        elif param_type in ["boolean", "bool", "binary"]:
+            param_value = param_value.lower()
+            if param_value not in ["true", "false"]:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' is not a boolean (`true` of `false`) in tool '{func_name}', degenerating to false."
+                )
+            return param_value == "true"
+        else:
+            if param_type in ["object", "array", "arr"
+                              ] or param_type.startswith(
+                                  "dict") or param_type.startswith("list"):
                 try:
+                    param_value = json.loads(param_value)
+                    return param_value
                 except:
                     logger.warning(
+                        f"Parsed value '{param_value}' of parameter '{param_name}' cannot be parsed with json.loads in tool "
+                        f"'{func_name}', will try other methods to parse it.")
+            try:
+                param_value = ast.literal_eval(param_value)  # safer
+            except:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' cannot be converted via Python `ast.literal_eval()` in tool '{func_name}', degenerating to string."
+                )
+            return param_value
+    def _parse_xml_function_call(
+            self, function_call_str: str,
+            tools: Optional[list[ChatCompletionToolsParam]]
+    ) -> Optional[ToolCall]:
         # Extract function name
         end_index = function_call_str.index(">")
         function_name = function_call_str[:end_index]
+        param_config = self._get_arguments_config(function_name, tools)
+        parameters = function_call_str[end_index + 1:]
         param_dict = {}
+        for match_text in self.tool_call_parameter_regex.findall(parameters):
             idx = match_text.index(">")
             param_name = match_text[:idx]
+            param_value = str(match_text[idx + 1:])
             # Remove prefix and trailing \n
             if param_value.startswith("\n"):
                 param_value = param_value[1:]
             if param_value.endswith("\n"):
                 param_value = param_value[:-1]
+            param_dict[param_name] = self._convert_param_value(
+                param_value, param_name, param_config, function_name)
         return ToolCall(
             type="function",
+            function=FunctionCall(name=function_name,
+                                  arguments=json.dumps(param_dict,
+                                                       ensure_ascii=False)),
         )
     def _get_function_calls(self, model_output: str) -> List[str]:
         raw_function_calls = []
         for tool_call in raw_tool_calls:
+            raw_function_calls.extend(
+                self.tool_call_function_regex.findall(tool_call))
         function_calls = [
             match[0] if match[0] else match[1] for match in raw_function_calls
     ) -> ExtractedToolCallInformation:
         # Quick check to avoid unnecessary processing
         if self.tool_call_prefix not in model_output:
+            return ExtractedToolCallInformation(tools_called=False,
+                                                tool_calls=[],
+                                                content=model_output)
         try:
             function_calls = self._get_function_calls(model_output)
             if len(function_calls) == 0:
+                return ExtractedToolCallInformation(tools_called=False,
+                                                    tool_calls=[],
+                                                    content=model_output)
             tool_calls = [
                 self._parse_xml_function_call(function_call_str, request.tools)
             self.prev_tool_call_arr.clear()  # Clear previous calls
             for tool_call in tool_calls:
                 if tool_call:
+                    self.prev_tool_call_arr.append({
+                        "name":
+                        tool_call.function.name,
+                        "arguments":
+                        tool_call.function.arguments,
+                    })
             # Extract content before tool calls
             content_index = model_output.find(self.tool_call_start_token)
+            content_index = content_index if content_index >= 0 else model_output.find(
+                self.tool_call_prefix)
             content = model_output[:content_index]  # .rstrip()
             return ExtractedToolCallInformation(
         except Exception:
             logger.exception("Error in extracting tool call from response.")
+            return ExtractedToolCallInformation(tools_called=False,
+                                                tool_calls=[],
+                                                content=model_output)
     def extract_tool_calls_streaming(
         self,
         delta_token_ids: Sequence[int],
         request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
+        # Store request for type conversion
+        if not previous_text:
+            self._reset_streaming_state()
+            self.streaming_request = request
         # If no delta text, return None unless it's an EOS token after tool calls
         if not delta_text:
             # Check if this is an EOS token after all tool calls are complete
             if delta_token_ids and self.tool_call_end_token_id not in delta_token_ids:
                 # Count complete tool calls
                 complete_calls = len(
+                    self.tool_call_complete_regex.findall(current_text))
                 # If we have completed tool calls and populated prev_tool_call_arr
                 if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
                     # Check if all tool calls are closed
                     open_calls = current_text.count(
+                        self.tool_call_start_token) - current_text.count(
+                            self.tool_call_end_token)
                     if open_calls == 0:
                         # Return empty delta message to allow finish_reason processing
                         return DeltaMessage(content="")
                     return DeltaMessage(content="")
             return None
         # Update accumulated text
         self.accumulated_text = current_text
                 self.param_count = 0
                 self.json_started = False
                 self.json_closed = False
+                self.accumulated_params = {}
                 # Check if there are more tool calls
                 tool_starts = current_text.count(self.tool_call_start_token)
         # Handle normal content before tool calls
         if not self.is_tool_call_started:
             # Check if tool call is starting
+            if self.tool_call_start_token_id in delta_token_ids or self.tool_call_start_token in delta_text:
                 self.is_tool_call_started = True
                 # Return any content before the tool call
                 if self.tool_call_start_token in delta_text:
+                    content_before = delta_text[:delta_text.index(
+                        self.tool_call_start_token)]
                     if content_before:
                         return DeltaMessage(content=content_before)
                 return None
         tool_start_idx = tool_starts[self.current_tool_index]
         # Find where this tool call ends (or current position if not ended yet)
+        tool_end_idx = current_text.find(self.tool_call_end_token,
+                                         tool_start_idx)
         if tool_end_idx == -1:
             tool_text = current_text[tool_start_idx:]
         else:
+            tool_text = current_text[tool_start_idx:tool_end_idx +
+                                     len(self.tool_call_end_token)]
         # Looking for function header
         if not self.header_sent:
             if self.tool_call_prefix in tool_text:
                 func_start = tool_text.find(self.tool_call_prefix) + len(
+                    self.tool_call_prefix)
                 func_end = tool_text.find(">", func_start)
                 if func_end != -1:
                     # This ensures finish_reason="tool_calls" even if parsing isn't complete
                     already_added = any(
                         tool.get("name") == self.current_function_name
+                        for tool in self.prev_tool_call_arr)
                     if not already_added:
+                        self.prev_tool_call_arr.append({
+                            "name": self.current_function_name,
+                            "arguments":
+                            "{}",  # Placeholder, will be updated later
+                        })
                     # Send header with function info
+                    return DeltaMessage(tool_calls=[
+                        DeltaToolCall(
+                            index=self.current_tool_index,
+                            id=self.current_tool_id,
+                            function=DeltaFunctionCall(
+                                name=self.current_function_name, arguments=""),
+                            type="function",
+                        )
+                    ])
             return None
         # We've sent header, now handle function body
         if self.in_function:
             # Send opening brace if not sent yet
+            if not self.json_started and self.parameter_prefix not in delta_text:
                 self.json_started = True
+                return DeltaMessage(tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_index,
+                        function=DeltaFunctionCall(arguments="{"),
+                    )
+                ])
             # Make sure json_started is set if we're processing parameters
             if not self.json_started:
                 # Extract the complete tool call to update prev_tool_call_arr with final arguments
                 # Find the function content
                 func_start = tool_text.find(self.tool_call_prefix) + len(
+                    self.tool_call_prefix)
+                func_content_end = tool_text.find(self.function_end_token,
+                                                  func_start)
                 if func_content_end != -1:
                     func_content = tool_text[func_start:func_content_end]
                     # Parse to get the complete arguments
                     try:
                         parsed_tool = self._parse_xml_function_call(
+                            func_content, self.streaming_request.tools
+                            if self.streaming_request else None)
                         if parsed_tool:
                             # Update existing entry in prev_tool_call_arr with complete arguments
                             for i, tool in enumerate(self.prev_tool_call_arr):
+                                if tool.get(
+                                        "name") == parsed_tool.function.name:
+                                    self.prev_tool_call_arr[i][
+                                        "arguments"] = parsed_tool.function.arguments
                                     break
                     except Exception:
                         pass  # Ignore parsing errors during streaming
+                result = DeltaMessage(tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_index,
+                        function=DeltaFunctionCall(arguments="}"),
+                    )
+                ])
                 # Reset state for next tool
                 self.in_function = False
                 self.json_closed = True
+                self.accumulated_params = {}
                 return result
             # Look for parameters
+            # Find all parameter starts
+            param_starts = []
+            idx = 0
+            while True:
+                idx = tool_text.find(self.parameter_prefix, idx)
+                if idx == -1:
+                    break
+                param_starts.append(idx)
+                idx += len(self.parameter_prefix)
             # Check if we should start a new parameter
+            if not self.in_param and self.param_count < len(param_starts):
                 if len(param_starts) > self.param_count:
                     # Process the next parameter
                             value_text = value_text[1:]
                         # Find where this parameter ends
+                        param_end_idx = value_text.find(
+                            self.parameter_end_token)
+                        if param_end_idx == -1:
+                            # No closing tag, look for next parameter or function end
+                            next_param_idx = value_text.find(
+                                self.parameter_prefix)
+                            func_end_idx = value_text.find(
+                                self.function_end_token)
+                            if next_param_idx != -1 and (func_end_idx == -1
+                                                         or next_param_idx
+                                                         < func_end_idx):
+                                param_end_idx = next_param_idx
+                            elif func_end_idx != -1:
+                                param_end_idx = func_end_idx
+                            else:
+                                # Neither found, check if tool call is complete
+                                if self.tool_call_end_token in tool_text:
+                                    # Tool call is complete, so parameter must be complete too
+                                    # Use all remaining text before function end as value
+                                    param_end_idx = len(value_text)
+                                else:
+                                    # Still streaming, wait for more content
+                                    return None
                         if param_end_idx != -1:
                             # Complete parameter found
                             param_value = value_text[:param_end_idx]
                             if param_value.endswith("\n"):
                                 param_value = param_value[:-1]
+                            # Store raw value for later processing
+                            self.accumulated_params[
+                                self.current_param_name] = param_value
+                            # Get parameter configuration for type conversion
+                            param_config = self._get_arguments_config(
+                                self.current_function_name,
+                                self.streaming_request.tools
+                                if self.streaming_request else None)
+                            # Convert the parameter value to the appropriate type
+                            converted_value = self._convert_param_value(
+                                param_value, self.current_param_name,
+                                param_config, self.current_function_name)
+                            # Build JSON fragment based on the converted type
+                            # Use json.dumps to properly serialize the value
+                            serialized_value = json.dumps(converted_value,
+                                                          ensure_ascii=False)
                             if self.param_count == 0:
+                                json_fragment = f'"{self.current_param_name}": {serialized_value}'
                             else:
+                                json_fragment = f', "{self.current_param_name}": {serialized_value}'
                             self.param_count += 1
+                            return DeltaMessage(tool_calls=[
+                                DeltaToolCall(
+                                    index=self.current_tool_index,
+                                    function=DeltaFunctionCall(
+                                        arguments=json_fragment),
+                                )
+                            ])
+            # Continue parameter value - Not used in the current implementation
+            # since we process complete parameters above
             if self.in_param:
                 if self.parameter_end_token in delta_text:
                     # End of parameter
                     # Skip past > if at start
                     if not self.current_param_value and ">" in value_chunk:
                         gt_idx = value_chunk.find(">")
+                        value_chunk = value_chunk[gt_idx + 1:]
+                    if not self.current_param_value and value_chunk.startswith(
+                            "\n"):
                         value_chunk = value_chunk[1:]
+                    # Store complete value
                     full_value = self.current_param_value + value_chunk
+                    self.accumulated_params[
+                        self.current_param_name] = full_value
+                    # Get parameter configuration for type conversion
+                    param_config = self._get_arguments_config(
+                        self.current_function_name,
+                        self.streaming_request.tools
+                        if self.streaming_request else None)
+                    # Convert the parameter value to the appropriate type
+                    converted_value = self._convert_param_value(
+                        full_value, self.current_param_name, param_config,
+                        self.current_function_name)
+                    # Serialize the converted value
+                    serialized_value = json.dumps(converted_value,
+                                                  ensure_ascii=False)
+                    # Since we've been streaming the quoted version, we need to close it properly
+                    # This is complex - for now just complete the value
                     self.in_param = False
                     self.current_param_value = ""
+                    # Just close the current parameter string
+                    return DeltaMessage(tool_calls=[
+                        DeltaToolCall(
+                            index=self.current_tool_index,
+                            function=DeltaFunctionCall(
+                                arguments='"'),  # Close the string quote
+                        )
+                    ])
                 else:
                     # Continue accumulating value
                     value_chunk = delta_text
                     # Handle first chunk after param name
                     if not self.current_param_value and ">" in value_chunk:
                         gt_idx = value_chunk.find(">")
+                        value_chunk = value_chunk[gt_idx + 1:]
+                    if not self.current_param_value and value_chunk.startswith(
+                            "\n"):
                         value_chunk = value_chunk[1:]
                     if value_chunk:
                         # Stream the escaped delta
+                        prev_escaped = json.dumps(
+                            self.current_param_value, ensure_ascii=False
+                        )[1:-1] if self.current_param_value else ""
                         self.current_param_value += value_chunk
+                        full_escaped = json.dumps(self.current_param_value,
+                                                  ensure_ascii=False)[1:-1]
+                        delta_escaped = full_escaped[len(prev_escaped):]
                         if delta_escaped:
+                            return DeltaMessage(tool_calls=[
+                                DeltaToolCall(
+                                    index=self.current_tool_index,
+                                    function=DeltaFunctionCall(
+                                        arguments=delta_escaped),
+                                )
+                            ])
         return None