|
{ |
|
"model": "Qwen/Qwen3-30B-A3B", |
|
"model_type": "qwen3_moe", |
|
"model_revision": null, |
|
"task_type": "causal_lm", |
|
"torch_dtype": "bfloat16", |
|
"attn_impl": null, |
|
"num_labels": null, |
|
"problem_type": null, |
|
"rope_scaling": null, |
|
"device_map": null, |
|
"max_memory": {}, |
|
"local_repo_path": null, |
|
"template": "qwen3", |
|
"system": null, |
|
"max_length": 2048, |
|
"truncation_strategy": "delete", |
|
"max_pixels": null, |
|
"agent_template": null, |
|
"norm_bbox": null, |
|
"response_prefix": null, |
|
"padding_side": "right", |
|
"loss_scale": "default", |
|
"sequence_parallel_size": 1, |
|
"use_chat_template": true, |
|
"template_backend": "swift", |
|
"dataset": [], |
|
"val_dataset": [], |
|
"split_dataset_ratio": 0.01, |
|
"data_seed": 42, |
|
"dataset_num_proc": 1, |
|
"dataset_shuffle": true, |
|
"val_dataset_shuffle": false, |
|
"streaming": false, |
|
"interleave_prob": null, |
|
"stopping_strategy": "first_exhausted", |
|
"shuffle_buffer_size": 1000, |
|
"enable_cache": false, |
|
"download_mode": "reuse_dataset_if_exists", |
|
"columns": {}, |
|
"strict": false, |
|
"remove_unused_columns": true, |
|
"model_name": [ |
|
null, |
|
null |
|
], |
|
"model_author": [ |
|
null, |
|
null |
|
], |
|
"custom_dataset_info": [], |
|
"quant_method": null, |
|
"quant_bits": null, |
|
"hqq_axis": null, |
|
"bnb_4bit_compute_dtype": "bfloat16", |
|
"bnb_4bit_quant_type": "nf4", |
|
"bnb_4bit_use_double_quant": true, |
|
"bnb_4bit_quant_storage": null, |
|
"max_new_tokens": null, |
|
"temperature": null, |
|
"top_k": null, |
|
"top_p": null, |
|
"repetition_penalty": null, |
|
"num_beams": 1, |
|
"stream": false, |
|
"stop_words": [], |
|
"logprobs": false, |
|
"top_logprobs": null, |
|
"ckpt_dir": null, |
|
"load_dataset_config": null, |
|
"lora_modules": [], |
|
"tuner_backend": "peft", |
|
"train_type": "lora", |
|
"adapters": [], |
|
"external_plugins": [], |
|
"seed": 42, |
|
"model_kwargs": {}, |
|
"load_args": true, |
|
"load_data_args": false, |
|
"use_hf": true, |
|
"hub_token": null, |
|
"custom_register_path": [], |
|
"ignore_args_error": false, |
|
"use_swift_lora": false, |
|
"merge_lora": false, |
|
"safe_serialization": true, |
|
"max_shard_size": "5GB", |
|
"output_dir": "/fsx/s3/cantina-text-datastore/megatron_output/Qwen3-30B-A3B-mcore/training_data_sft_v1_hf_20250512_ms_swift_json_outputs_lr2e-6/v0-20250605-204723_iter2000_hf", |
|
"quant_n_samples": 256, |
|
"quant_batch_size": 1, |
|
"group_size": 128, |
|
"to_ollama": false, |
|
"to_mcore": false, |
|
"to_hf": true, |
|
"mcore_model": "/fsx/s3/cantina-text-datastore/megatron_output/Qwen3-30B-A3B-mcore/training_data_sft_v1_hf_20250512_ms_swift_json_outputs_lr2e-6/v0-20250605-204723", |
|
"thread_count": 7, |
|
"test_convert_precision": false, |
|
"push_to_hub": false, |
|
"hub_model_id": null, |
|
"hub_private_repo": false, |
|
"commit_message": "update files", |
|
"to_peft_format": false, |
|
"exist_ok": false, |
|
"rank": 0, |
|
"local_rank": 0, |
|
"global_world_size": 1, |
|
"local_world_size": 1, |
|
"model_suffix": "Qwen3-30B-A3B", |
|
"model_info": "ModelInfo(model_type='qwen3_moe', model_dir='/root/.cache/huggingface/hub/models--Qwen--Qwen3-30B-A3B/snapshots/ae659febe817e4b3ebd7355f47792725801204c9', torch_dtype=torch.bfloat16, max_model_len=40960, quant_method=None, quant_bits=None, rope_scaling=None, config=None, task_type='causal_lm', num_labels=None)", |
|
"model_meta": "ModelMeta(model_type='qwen3_moe', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-30B-A3B-Base', hf_model_id='Qwen/Qwen3-30B-A3B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-Base', hf_model_id='Qwen/Qwen3-235B-A22B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B', hf_model_id='Qwen/Qwen3-30B-A3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B', hf_model_id='Qwen/Qwen3-235B-A22B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7fb186d6ae80>, model_arch=None, architectures=['Qwen3MoeForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])", |
|
"model_dir": "/root/.cache/huggingface/hub/models--Qwen--Qwen3-30B-A3B/snapshots/ae659febe817e4b3ebd7355f47792725801204c9", |
|
"hub": "<class 'swift.hub.hub.HFHub'>" |
|
} |