{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4678362573099415, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00935672514619883, "grad_norm": 4.463105201721191, "learning_rate": 2e-05, "loss": 3.2142, "step": 1 }, { "epoch": 0.00935672514619883, "eval_loss": 3.3358547687530518, "eval_runtime": 13.9023, "eval_samples_per_second": 6.474, "eval_steps_per_second": 6.474, "step": 1 }, { "epoch": 0.01871345029239766, "grad_norm": 4.7624897956848145, "learning_rate": 4e-05, "loss": 3.3766, "step": 2 }, { "epoch": 0.028070175438596492, "grad_norm": 3.991523265838623, "learning_rate": 6e-05, "loss": 3.3922, "step": 3 }, { "epoch": 0.03742690058479532, "grad_norm": 3.8783044815063477, "learning_rate": 8e-05, "loss": 3.0767, "step": 4 }, { "epoch": 0.04678362573099415, "grad_norm": 4.1902995109558105, "learning_rate": 0.0001, "loss": 3.2965, "step": 5 }, { "epoch": 0.056140350877192984, "grad_norm": 4.054313659667969, "learning_rate": 0.00012, "loss": 3.0089, "step": 6 }, { "epoch": 0.06549707602339182, "grad_norm": 4.280858993530273, "learning_rate": 0.00014, "loss": 3.0863, "step": 7 }, { "epoch": 0.07485380116959064, "grad_norm": 3.999479293823242, "learning_rate": 0.00016, "loss": 2.9925, "step": 8 }, { "epoch": 0.08421052631578947, "grad_norm": 3.4866421222686768, "learning_rate": 0.00018, "loss": 2.941, "step": 9 }, { "epoch": 0.0935672514619883, "grad_norm": 3.900364398956299, "learning_rate": 0.0002, "loss": 2.7885, "step": 10 }, { "epoch": 0.10292397660818714, "grad_norm": 3.4323623180389404, "learning_rate": 0.0001999390827019096, "loss": 2.8013, "step": 11 }, { "epoch": 0.11228070175438597, "grad_norm": 3.4795432090759277, "learning_rate": 0.00019975640502598244, "loss": 2.6806, "step": 12 }, { "epoch": 0.1216374269005848, "grad_norm": 3.2568836212158203, "learning_rate": 0.00019945218953682734, "loss": 2.6366, "step": 13 }, { "epoch": 0.13099415204678364, "grad_norm": 2.9930436611175537, "learning_rate": 0.00019902680687415705, "loss": 2.7908, "step": 14 }, { "epoch": 0.14035087719298245, "grad_norm": 2.845224380493164, "learning_rate": 0.00019848077530122083, "loss": 2.5384, "step": 15 }, { "epoch": 0.1497076023391813, "grad_norm": 2.690312385559082, "learning_rate": 0.00019781476007338058, "loss": 2.7595, "step": 16 }, { "epoch": 0.15906432748538013, "grad_norm": 3.0404510498046875, "learning_rate": 0.00019702957262759965, "loss": 2.7844, "step": 17 }, { "epoch": 0.16842105263157894, "grad_norm": 2.836411952972412, "learning_rate": 0.0001961261695938319, "loss": 2.4923, "step": 18 }, { "epoch": 0.17777777777777778, "grad_norm": 3.020054578781128, "learning_rate": 0.00019510565162951537, "loss": 2.6076, "step": 19 }, { "epoch": 0.1871345029239766, "grad_norm": 2.719973087310791, "learning_rate": 0.00019396926207859084, "loss": 2.6418, "step": 20 }, { "epoch": 0.19649122807017544, "grad_norm": 3.120027780532837, "learning_rate": 0.00019271838545667876, "loss": 2.7253, "step": 21 }, { "epoch": 0.20584795321637428, "grad_norm": 2.7467753887176514, "learning_rate": 0.0001913545457642601, "loss": 2.4353, "step": 22 }, { "epoch": 0.2152046783625731, "grad_norm": 3.3954317569732666, "learning_rate": 0.0001898794046299167, "loss": 2.6932, "step": 23 }, { "epoch": 0.22456140350877193, "grad_norm": 2.6571295261383057, "learning_rate": 0.00018829475928589271, "loss": 2.5214, "step": 24 }, { "epoch": 0.23391812865497075, "grad_norm": 2.4160666465759277, "learning_rate": 0.00018660254037844388, "loss": 2.6052, "step": 25 }, { "epoch": 0.23391812865497075, "eval_loss": 2.5258662700653076, "eval_runtime": 13.9413, "eval_samples_per_second": 6.456, "eval_steps_per_second": 6.456, "step": 25 }, { "epoch": 0.2432748538011696, "grad_norm": 2.6495888233184814, "learning_rate": 0.0001848048096156426, "loss": 2.5469, "step": 26 }, { "epoch": 0.25263157894736843, "grad_norm": 2.838324785232544, "learning_rate": 0.00018290375725550417, "loss": 2.5969, "step": 27 }, { "epoch": 0.26198830409356727, "grad_norm": 2.747297525405884, "learning_rate": 0.00018090169943749476, "loss": 2.4122, "step": 28 }, { "epoch": 0.27134502923976606, "grad_norm": 2.5374674797058105, "learning_rate": 0.00017880107536067218, "loss": 2.2964, "step": 29 }, { "epoch": 0.2807017543859649, "grad_norm": 2.8469207286834717, "learning_rate": 0.0001766044443118978, "loss": 2.6656, "step": 30 }, { "epoch": 0.29005847953216374, "grad_norm": 2.8864634037017822, "learning_rate": 0.00017431448254773944, "loss": 2.2717, "step": 31 }, { "epoch": 0.2994152046783626, "grad_norm": 2.550114393234253, "learning_rate": 0.0001719339800338651, "loss": 2.3912, "step": 32 }, { "epoch": 0.3087719298245614, "grad_norm": 2.7608063220977783, "learning_rate": 0.00016946583704589973, "loss": 2.3453, "step": 33 }, { "epoch": 0.31812865497076026, "grad_norm": 2.7784292697906494, "learning_rate": 0.00016691306063588583, "loss": 2.495, "step": 34 }, { "epoch": 0.32748538011695905, "grad_norm": 2.7670254707336426, "learning_rate": 0.00016427876096865394, "loss": 2.3523, "step": 35 }, { "epoch": 0.3368421052631579, "grad_norm": 2.92022442817688, "learning_rate": 0.0001615661475325658, "loss": 2.5027, "step": 36 }, { "epoch": 0.34619883040935673, "grad_norm": 2.6825602054595947, "learning_rate": 0.00015877852522924732, "loss": 2.5576, "step": 37 }, { "epoch": 0.35555555555555557, "grad_norm": 2.5809407234191895, "learning_rate": 0.0001559192903470747, "loss": 2.4248, "step": 38 }, { "epoch": 0.3649122807017544, "grad_norm": 2.5570051670074463, "learning_rate": 0.0001529919264233205, "loss": 2.3693, "step": 39 }, { "epoch": 0.3742690058479532, "grad_norm": 2.4310109615325928, "learning_rate": 0.00015000000000000001, "loss": 2.4687, "step": 40 }, { "epoch": 0.38362573099415204, "grad_norm": 2.647545099258423, "learning_rate": 0.00014694715627858908, "loss": 2.5393, "step": 41 }, { "epoch": 0.3929824561403509, "grad_norm": 2.5504465103149414, "learning_rate": 0.00014383711467890774, "loss": 2.3839, "step": 42 }, { "epoch": 0.4023391812865497, "grad_norm": 2.3321890830993652, "learning_rate": 0.00014067366430758004, "loss": 2.5113, "step": 43 }, { "epoch": 0.41169590643274856, "grad_norm": 2.408841371536255, "learning_rate": 0.00013746065934159123, "loss": 2.3157, "step": 44 }, { "epoch": 0.42105263157894735, "grad_norm": 2.5375423431396484, "learning_rate": 0.00013420201433256689, "loss": 2.676, "step": 45 }, { "epoch": 0.4304093567251462, "grad_norm": 2.8962650299072266, "learning_rate": 0.00013090169943749476, "loss": 2.8322, "step": 46 }, { "epoch": 0.439766081871345, "grad_norm": 2.144554615020752, "learning_rate": 0.0001275637355816999, "loss": 2.2713, "step": 47 }, { "epoch": 0.44912280701754387, "grad_norm": 2.3191635608673096, "learning_rate": 0.00012419218955996676, "loss": 2.3502, "step": 48 }, { "epoch": 0.4584795321637427, "grad_norm": 2.344190835952759, "learning_rate": 0.00012079116908177593, "loss": 2.5839, "step": 49 }, { "epoch": 0.4678362573099415, "grad_norm": 2.446531295776367, "learning_rate": 0.00011736481776669306, "loss": 2.5168, "step": 50 }, { "epoch": 0.4678362573099415, "eval_loss": 2.3896028995513916, "eval_runtime": 13.9611, "eval_samples_per_second": 6.446, "eval_steps_per_second": 6.446, "step": 50 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8696642257551360.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }