qwen-3b-math-fp8-msamp-o2 / training_args.json
vinhpx's picture
Upload folder using huggingface_hub
85b53d7 verified
raw
history blame contribute delete
944 Bytes
{
"model_name": "Qwen/Qwen2.5-3B",
"dataset_name": "nvidia/OpenMathInstruct-2",
"max_length": 2048,
"max_samples": 100000,
"batch_size": 3,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-05,
"num_epochs": 3,
"warmup_steps": 200,
"weight_decay": 0.01,
"seed": 42,
"max_checkpoints": 3,
"save_steps": 10000,
"eval_steps": 10000,
"output_dir": "./qwen_math_fp8_model",
"fp8_backend": "msamp",
"msamp_opt_level": "O2",
"te_fp8_format": "HYBRID",
"te_amax_history_len": 32,
"te_amax_compute_algo": "max",
"use_generated_solution": true,
"solution_field": "generated_solution",
"use_wandb": true,
"wandb_project": "qwen-math-fp8",
"wandb_entity": null,
"wandb_run_name": null,
"wandb_tags": [
"fp8",
"qwen",
"math"
],
"wandb_notes": "",
"wandb_resume": false,
"wandb_watch_model": false,
"wandb_watch_freq": 1000,
"wandb_log_freq": 10,
"wandb_log_model": false
}