Intel
/

phi-2-int4-inc

@@ -22,10 +22,10 @@
   "quantization_config": {
     "amp": true,
     "autoround_version": "0.3.1.dev",
     "bits": 4,
-    "damp_percent": 0.01,
     "data_type": "int",
-    "desc_act": false,
     "enable_minmax_tuning": true,
     "enable_norm_bias_tuning": false,
     "enable_quanted_input": true,
@@ -37,12 +37,11 @@
     "minmax_lr": 0.001,
     "nsamples": 512,
     "quant_block_list": null,
-    "quant_method": "gptq",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,
-    "train_bs": 8,
-    "true_sequential": false
   },
   "resid_pdrop": 0.1,
   "rope_scaling": null,

   "quantization_config": {
     "amp": true,
     "autoround_version": "0.3.1.dev",
+    "backend": "auto_round:gptq:exllamav2",
     "bits": 4,
     "data_type": "int",
+    "dataset": "NeelNanda/pile-10k",
     "enable_minmax_tuning": true,
     "enable_norm_bias_tuning": false,
     "enable_quanted_input": true,
     "minmax_lr": 0.001,
     "nsamples": 512,
     "quant_block_list": null,
+    "quant_method": "intel/auto-round",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,
+    "train_bs": 8
   },
   "resid_pdrop": 0.1,
   "rope_scaling": null,

quantize_config.json → quantization_config.json RENAMED Viewed

@@ -17,9 +17,8 @@
   "low_gpu_mem_usage": false,
   "quant_block_list": null,
   "enable_norm_bias_tuning": false,
   "autoround_version": "0.3.1.dev",
-  "quant_method": "gptq",
-  "desc_act": false,
-  "true_sequential": false,
-  "damp_percent": 0.01
 }

   "low_gpu_mem_usage": false,
   "quant_block_list": null,
   "enable_norm_bias_tuning": false,
+  "dataset": "NeelNanda/pile-10k",
   "autoround_version": "0.3.1.dev",
+  "quant_method": "intel/auto-round",
+  "backend": "auto_round:gptq:exllamav2"
 }