Qwen2.5-1.5B-Open-R1-Distill / trainer_state.json
SamanthaZSZ's picture
Model save
0949db1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 169,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.029585798816568046,
"grad_norm": 2.3596792221069336,
"learning_rate": 4.705882352941177e-06,
"loss": 1.1077,
"mean_token_accuracy": 0.7105089992284774,
"num_tokens": 2621440.0,
"step": 5
},
{
"epoch": 0.05917159763313609,
"grad_norm": 1.7160693407058716,
"learning_rate": 1.0588235294117648e-05,
"loss": 1.0353,
"mean_token_accuracy": 0.7203521847724914,
"num_tokens": 5242880.0,
"step": 10
},
{
"epoch": 0.08875739644970414,
"grad_norm": 0.8708027601242065,
"learning_rate": 1.647058823529412e-05,
"loss": 0.9553,
"mean_token_accuracy": 0.7316273808479309,
"num_tokens": 7861273.0,
"step": 15
},
{
"epoch": 0.11834319526627218,
"grad_norm": 0.6267297267913818,
"learning_rate": 1.999145758387301e-05,
"loss": 0.8899,
"mean_token_accuracy": 0.743945425748825,
"num_tokens": 10474605.0,
"step": 20
},
{
"epoch": 0.14792899408284024,
"grad_norm": 0.4869539439678192,
"learning_rate": 1.9895522933272028e-05,
"loss": 0.8455,
"mean_token_accuracy": 0.7542784661054611,
"num_tokens": 13096045.0,
"step": 25
},
{
"epoch": 0.17751479289940827,
"grad_norm": 0.4492546319961548,
"learning_rate": 1.9694002659393306e-05,
"loss": 0.8389,
"mean_token_accuracy": 0.7545322090387344,
"num_tokens": 15717485.0,
"step": 30
},
{
"epoch": 0.20710059171597633,
"grad_norm": 0.4301901161670685,
"learning_rate": 1.9389046991574298e-05,
"loss": 0.8268,
"mean_token_accuracy": 0.7568192929029465,
"num_tokens": 18338925.0,
"step": 35
},
{
"epoch": 0.23668639053254437,
"grad_norm": 0.37857791781425476,
"learning_rate": 1.898390981891979e-05,
"loss": 0.7956,
"mean_token_accuracy": 0.7649114817380905,
"num_tokens": 20960365.0,
"step": 40
},
{
"epoch": 0.26627218934911245,
"grad_norm": 0.3774871230125427,
"learning_rate": 1.8482913971175737e-05,
"loss": 0.8079,
"mean_token_accuracy": 0.7604979366064072,
"num_tokens": 23581805.0,
"step": 45
},
{
"epoch": 0.2958579881656805,
"grad_norm": 0.3935665488243103,
"learning_rate": 1.789140509396394e-05,
"loss": 0.7961,
"mean_token_accuracy": 0.7637095510959625,
"num_tokens": 26203245.0,
"step": 50
},
{
"epoch": 0.3254437869822485,
"grad_norm": 0.3560955226421356,
"learning_rate": 1.7215694610530624e-05,
"loss": 0.7946,
"mean_token_accuracy": 0.7636954367160798,
"num_tokens": 28824685.0,
"step": 55
},
{
"epoch": 0.35502958579881655,
"grad_norm": 0.37536928057670593,
"learning_rate": 1.646299237860941e-05,
"loss": 0.7938,
"mean_token_accuracy": 0.7634146034717559,
"num_tokens": 31443682.0,
"step": 60
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.3460127115249634,
"learning_rate": 1.5641329760952514e-05,
"loss": 0.7639,
"mean_token_accuracy": 0.7715859562158585,
"num_tokens": 34061327.0,
"step": 65
},
{
"epoch": 0.41420118343195267,
"grad_norm": 0.37248924374580383,
"learning_rate": 1.4759473930370738e-05,
"loss": 0.7771,
"mean_token_accuracy": 0.7680761635303497,
"num_tokens": 36682767.0,
"step": 70
},
{
"epoch": 0.4437869822485207,
"grad_norm": 0.34658947587013245,
"learning_rate": 1.3826834323650899e-05,
"loss": 0.8028,
"mean_token_accuracy": 0.7603934347629547,
"num_tokens": 39300751.0,
"step": 75
},
{
"epoch": 0.47337278106508873,
"grad_norm": 0.3706357479095459,
"learning_rate": 1.2853362242491054e-05,
"loss": 0.7738,
"mean_token_accuracy": 0.7687152832746506,
"num_tokens": 41918913.0,
"step": 80
},
{
"epoch": 0.5029585798816568,
"grad_norm": 0.3420180380344391,
"learning_rate": 1.1849444672715587e-05,
"loss": 0.779,
"mean_token_accuracy": 0.7665890276432037,
"num_tokens": 44540169.0,
"step": 85
},
{
"epoch": 0.5325443786982249,
"grad_norm": 0.3875311315059662,
"learning_rate": 1.0825793454723325e-05,
"loss": 0.7683,
"mean_token_accuracy": 0.7695774495601654,
"num_tokens": 47160969.0,
"step": 90
},
{
"epoch": 0.5621301775147929,
"grad_norm": 0.35641783475875854,
"learning_rate": 9.79333098772446e-06,
"loss": 0.7692,
"mean_token_accuracy": 0.7692601472139359,
"num_tokens": 49782409.0,
"step": 95
},
{
"epoch": 0.591715976331361,
"grad_norm": 0.3525794446468353,
"learning_rate": 8.763073687306523e-06,
"loss": 0.7853,
"step": 100
},
{
"epoch": 0.591715976331361,
"eval_loss": 0.7879331111907959,
"eval_mean_token_accuracy": 0.77275630235672,
"eval_num_tokens": 52403849.0,
"eval_runtime": 1.3797,
"eval_samples_per_second": 93.496,
"eval_steps_per_second": 3.624,
"step": 100
},
{
"epoch": 0.621301775147929,
"grad_norm": 0.31415921449661255,
"learning_rate": 7.746014439841941e-06,
"loss": 0.7483,
"mean_token_accuracy": 0.7696478188037872,
"num_tokens": 55025289.0,
"step": 105
},
{
"epoch": 0.650887573964497,
"grad_norm": 0.35946062207221985,
"learning_rate": 6.7530053079531664e-06,
"loss": 0.751,
"mean_token_accuracy": 0.773971700668335,
"num_tokens": 57641987.0,
"step": 110
},
{
"epoch": 0.6804733727810651,
"grad_norm": 0.3247712552547455,
"learning_rate": 5.794641738572925e-06,
"loss": 0.766,
"mean_token_accuracy": 0.7699923694133759,
"num_tokens": 60263427.0,
"step": 115
},
{
"epoch": 0.7100591715976331,
"grad_norm": 0.32254937291145325,
"learning_rate": 4.881149509103993e-06,
"loss": 0.7655,
"mean_token_accuracy": 0.7701400071382523,
"num_tokens": 62882216.0,
"step": 120
},
{
"epoch": 0.7396449704142012,
"grad_norm": 0.3108745515346527,
"learning_rate": 4.0222756179675915e-06,
"loss": 0.7772,
"mean_token_accuracy": 0.7666765838861466,
"num_tokens": 65503656.0,
"step": 125
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.3257134258747101,
"learning_rate": 3.2271842837425917e-06,
"loss": 0.7507,
"mean_token_accuracy": 0.7745073974132538,
"num_tokens": 68125096.0,
"step": 130
},
{
"epoch": 0.7988165680473372,
"grad_norm": 0.29293152689933777,
"learning_rate": 2.504359162588741e-06,
"loss": 0.7436,
"mean_token_accuracy": 0.7756482750177384,
"num_tokens": 70746536.0,
"step": 135
},
{
"epoch": 0.8284023668639053,
"grad_norm": 0.3014591932296753,
"learning_rate": 1.861512827298051e-06,
"loss": 0.7438,
"mean_token_accuracy": 0.7760634958744049,
"num_tokens": 73365315.0,
"step": 140
},
{
"epoch": 0.8579881656804734,
"grad_norm": 0.2900739312171936,
"learning_rate": 1.305504473836331e-06,
"loss": 0.7585,
"mean_token_accuracy": 0.7716891765594482,
"num_tokens": 75986755.0,
"step": 145
},
{
"epoch": 0.8875739644970414,
"grad_norm": 0.2970161437988281,
"learning_rate": 8.42266733449425e-07,
"loss": 0.7436,
"mean_token_accuracy": 0.7757604539394378,
"num_tokens": 78608195.0,
"step": 150
},
{
"epoch": 0.9171597633136095,
"grad_norm": 0.29888418316841125,
"learning_rate": 4.7674237125185597e-07,
"loss": 0.7513,
"mean_token_accuracy": 0.7735760033130645,
"num_tokens": 81229635.0,
"step": 155
},
{
"epoch": 0.9467455621301775,
"grad_norm": 0.28909653425216675,
"learning_rate": 2.1283154672645522e-07,
"loss": 0.7583,
"mean_token_accuracy": 0.7715224415063858,
"num_tokens": 83851075.0,
"step": 160
},
{
"epoch": 0.9763313609467456,
"grad_norm": 0.2939208447933197,
"learning_rate": 5.3350198867574424e-08,
"loss": 0.7567,
"mean_token_accuracy": 0.7722329139709473,
"num_tokens": 86472515.0,
"step": 165
},
{
"epoch": 1.0,
"mean_token_accuracy": 0.7728699259459972,
"num_tokens": 88569667.0,
"step": 169,
"total_flos": 6.966137809639834e+17,
"train_loss": 0.80340930978222,
"train_runtime": 717.7254,
"train_samples_per_second": 30.133,
"train_steps_per_second": 0.235
}
],
"logging_steps": 5,
"max_steps": 169,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.966137809639834e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}