vinhpx
/

qwen-3b-math-fp8-msamp-o2

Model card Files Files and versions

qwen-3b-math-fp8-msamp-o2 / training_args.json

vinhpx's picture

Upload folder using huggingface_hub

85b53d7 verified 4 months ago

history blame contribute delete

944 Bytes

	{
	"model_name": "Qwen/Qwen2.5-3B",
	"dataset_name": "nvidia/OpenMathInstruct-2",
	"max_length": 2048,
	"max_samples": 100000,
	"batch_size": 3,
	"gradient_accumulation_steps": 16,
	"learning_rate": 2e-05,
	"num_epochs": 3,
	"warmup_steps": 200,
	"weight_decay": 0.01,
	"seed": 42,
	"max_checkpoints": 3,
	"save_steps": 10000,
	"eval_steps": 10000,
	"output_dir": "./qwen_math_fp8_model",
	"fp8_backend": "msamp",
	"msamp_opt_level": "O2",
	"te_fp8_format": "HYBRID",
	"te_amax_history_len": 32,
	"te_amax_compute_algo": "max",
	"use_generated_solution": true,
	"solution_field": "generated_solution",
	"use_wandb": true,
	"wandb_project": "qwen-math-fp8",
	"wandb_entity": null,
	"wandb_run_name": null,
	"wandb_tags": [
	"fp8",
	"qwen",
	"math"
	],
	"wandb_notes": "",
	"wandb_resume": false,
	"wandb_watch_model": false,
	"wandb_watch_freq": 1000,
	"wandb_log_freq": 10,
	"wandb_log_model": false
	}