Hunyuan-MT-7B-fp8 / angelslim_config.json
hhoh's picture
Upload angelslim_config.json with huggingface_hub
fd0d0e2 verified
{
"model_config": {
"name": "HunyuanDense",
"model_path": "Base Model Path",
"trust_remote_code": true,
"torch_dtype": "auto",
"device_map": "auto",
"low_cpu_mem_usage": true,
"use_cache": false,
"cache_dir": null
},
"compression_config": {
"name": "PTQ",
"quantization": {
"name": "fp8_static",
"bits": 8,
"quant_method": {
"weight": "per-tensor",
"activation": "per-tensor"
},
"quant_helpers": [],
"smooth_alpha": 0.5,
"low_memory": false,
"modules_to_quantize": [],
"zero_point": true,
"mse_range": false,
"ignore_layers": [
"lm_head",
"model.embed_tokens"
],
"quant_analyse": false,
"quant_vit": false
},
"cache": null
},
"dataset_config": {
"name": "TextDataset",
"data_path": "/cfs_cloud_code/rubingyang/data/hy_7b_translate/quant_mix512.jsonl",
"max_seq_length": 4096,
"num_samples": 512,
"batch_size": 1,
"shuffle": false,
"inference_settings": null
},
"global_config": {
"save_path": "Save Model Path",
"max_seq_length": 4096,
"hidden_size": 4096,
"model_arch_type": "hunyuan_v1_dense",
"deploy_backend": "vllm"
},
"infer_config": null,
"debug_info": {
"python": "3.12.11 (main, Jun 4 2025, 08:56:18) [GCC 11.4.0]",
"angelslim": {
"name": "angelslim",
"version": "0.1.1",
"source": "pip"
},
"torch": {
"name": "torch",
"version": "2.7.1+cu128",
"source": "pip"
},
"transformers": {
"name": "transformers",
"version": "4.55.0.dev0",
"source": "pip"
},
"torch_cuda_version": "12.8"
}
}