{ "best_metric": 1.542069435119629, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.47095761381475665, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0031397174254317113, "grad_norm": 6.254443645477295, "learning_rate": 1.0140000000000001e-05, "loss": 1.6655, "step": 1 }, { "epoch": 0.0031397174254317113, "eval_loss": 3.4825549125671387, "eval_runtime": 3.7073, "eval_samples_per_second": 36.145, "eval_steps_per_second": 9.171, "step": 1 }, { "epoch": 0.006279434850863423, "grad_norm": 7.797785758972168, "learning_rate": 2.0280000000000002e-05, "loss": 2.381, "step": 2 }, { "epoch": 0.009419152276295133, "grad_norm": 8.442407608032227, "learning_rate": 3.0419999999999997e-05, "loss": 2.405, "step": 3 }, { "epoch": 0.012558869701726845, "grad_norm": 8.922061920166016, "learning_rate": 4.0560000000000005e-05, "loss": 2.7349, "step": 4 }, { "epoch": 0.015698587127158554, "grad_norm": 8.771944999694824, "learning_rate": 5.07e-05, "loss": 2.5019, "step": 5 }, { "epoch": 0.018838304552590265, "grad_norm": 7.558984756469727, "learning_rate": 6.0839999999999993e-05, "loss": 2.3498, "step": 6 }, { "epoch": 0.02197802197802198, "grad_norm": 7.375399589538574, "learning_rate": 7.097999999999999e-05, "loss": 1.9931, "step": 7 }, { "epoch": 0.02511773940345369, "grad_norm": 8.363454818725586, "learning_rate": 8.112000000000001e-05, "loss": 2.4119, "step": 8 }, { "epoch": 0.0282574568288854, "grad_norm": 8.049056053161621, "learning_rate": 9.126e-05, "loss": 2.1919, "step": 9 }, { "epoch": 0.03139717425431711, "grad_norm": 7.283090114593506, "learning_rate": 0.0001014, "loss": 1.694, "step": 10 }, { "epoch": 0.03453689167974882, "grad_norm": 7.796234130859375, "learning_rate": 0.00010086631578947368, "loss": 1.8361, "step": 11 }, { "epoch": 0.03767660910518053, "grad_norm": 8.134119033813477, "learning_rate": 0.00010033263157894736, "loss": 2.4029, "step": 12 }, { "epoch": 0.04081632653061224, "grad_norm": 10.318169593811035, "learning_rate": 9.979894736842105e-05, "loss": 2.4189, "step": 13 }, { "epoch": 0.04395604395604396, "grad_norm": 7.024040222167969, "learning_rate": 9.926526315789475e-05, "loss": 1.3956, "step": 14 }, { "epoch": 0.04709576138147567, "grad_norm": 7.71487283706665, "learning_rate": 9.873157894736843e-05, "loss": 1.8253, "step": 15 }, { "epoch": 0.05023547880690738, "grad_norm": 8.051153182983398, "learning_rate": 9.81978947368421e-05, "loss": 1.9171, "step": 16 }, { "epoch": 0.05337519623233909, "grad_norm": 9.367609977722168, "learning_rate": 9.766421052631579e-05, "loss": 1.918, "step": 17 }, { "epoch": 0.0565149136577708, "grad_norm": 7.217973709106445, "learning_rate": 9.713052631578947e-05, "loss": 1.5383, "step": 18 }, { "epoch": 0.059654631083202514, "grad_norm": 8.168498992919922, "learning_rate": 9.659684210526315e-05, "loss": 1.9243, "step": 19 }, { "epoch": 0.06279434850863422, "grad_norm": 9.34277629852295, "learning_rate": 9.606315789473684e-05, "loss": 1.9905, "step": 20 }, { "epoch": 0.06593406593406594, "grad_norm": 9.115056037902832, "learning_rate": 9.552947368421053e-05, "loss": 1.9055, "step": 21 }, { "epoch": 0.06907378335949764, "grad_norm": 10.160658836364746, "learning_rate": 9.499578947368422e-05, "loss": 1.8786, "step": 22 }, { "epoch": 0.07221350078492936, "grad_norm": 8.394580841064453, "learning_rate": 9.44621052631579e-05, "loss": 1.7636, "step": 23 }, { "epoch": 0.07535321821036106, "grad_norm": 7.4472832679748535, "learning_rate": 9.392842105263158e-05, "loss": 1.476, "step": 24 }, { "epoch": 0.07849293563579278, "grad_norm": 9.046372413635254, "learning_rate": 9.339473684210526e-05, "loss": 2.6342, "step": 25 }, { "epoch": 0.08163265306122448, "grad_norm": 12.512232780456543, "learning_rate": 9.286105263157894e-05, "loss": 2.0217, "step": 26 }, { "epoch": 0.0847723704866562, "grad_norm": 7.891317844390869, "learning_rate": 9.232736842105263e-05, "loss": 1.8033, "step": 27 }, { "epoch": 0.08791208791208792, "grad_norm": 8.680127143859863, "learning_rate": 9.179368421052632e-05, "loss": 1.8667, "step": 28 }, { "epoch": 0.09105180533751962, "grad_norm": 8.955038070678711, "learning_rate": 9.126e-05, "loss": 1.8639, "step": 29 }, { "epoch": 0.09419152276295134, "grad_norm": 8.710933685302734, "learning_rate": 9.072631578947368e-05, "loss": 1.9586, "step": 30 }, { "epoch": 0.09733124018838304, "grad_norm": 7.248199939727783, "learning_rate": 9.019263157894736e-05, "loss": 1.5127, "step": 31 }, { "epoch": 0.10047095761381476, "grad_norm": 7.970192909240723, "learning_rate": 8.965894736842104e-05, "loss": 1.5677, "step": 32 }, { "epoch": 0.10361067503924647, "grad_norm": 9.28384017944336, "learning_rate": 8.912526315789472e-05, "loss": 1.5312, "step": 33 }, { "epoch": 0.10675039246467818, "grad_norm": 9.90267276763916, "learning_rate": 8.859157894736842e-05, "loss": 2.2637, "step": 34 }, { "epoch": 0.10989010989010989, "grad_norm": 9.363068580627441, "learning_rate": 8.805789473684211e-05, "loss": 2.0247, "step": 35 }, { "epoch": 0.1130298273155416, "grad_norm": 9.736919403076172, "learning_rate": 8.752421052631579e-05, "loss": 1.6372, "step": 36 }, { "epoch": 0.11616954474097331, "grad_norm": 9.847123146057129, "learning_rate": 8.699052631578947e-05, "loss": 1.9869, "step": 37 }, { "epoch": 0.11930926216640503, "grad_norm": 10.517101287841797, "learning_rate": 8.645684210526315e-05, "loss": 2.3098, "step": 38 }, { "epoch": 0.12244897959183673, "grad_norm": 12.2364501953125, "learning_rate": 8.592315789473683e-05, "loss": 2.0425, "step": 39 }, { "epoch": 0.12558869701726844, "grad_norm": 8.809972763061523, "learning_rate": 8.538947368421051e-05, "loss": 2.2248, "step": 40 }, { "epoch": 0.12872841444270017, "grad_norm": 9.04055118560791, "learning_rate": 8.485578947368421e-05, "loss": 2.1993, "step": 41 }, { "epoch": 0.13186813186813187, "grad_norm": 12.418622016906738, "learning_rate": 8.43221052631579e-05, "loss": 1.8935, "step": 42 }, { "epoch": 0.13500784929356358, "grad_norm": 12.07343578338623, "learning_rate": 8.378842105263158e-05, "loss": 2.5241, "step": 43 }, { "epoch": 0.13814756671899528, "grad_norm": 12.244543075561523, "learning_rate": 8.325473684210526e-05, "loss": 1.9638, "step": 44 }, { "epoch": 0.141287284144427, "grad_norm": 11.888532638549805, "learning_rate": 8.272105263157894e-05, "loss": 2.2361, "step": 45 }, { "epoch": 0.14442700156985872, "grad_norm": 9.17966365814209, "learning_rate": 8.218736842105262e-05, "loss": 1.7368, "step": 46 }, { "epoch": 0.14756671899529042, "grad_norm": 12.1638822555542, "learning_rate": 8.165368421052632e-05, "loss": 2.0467, "step": 47 }, { "epoch": 0.15070643642072212, "grad_norm": 11.597208023071289, "learning_rate": 8.112000000000001e-05, "loss": 1.7656, "step": 48 }, { "epoch": 0.15384615384615385, "grad_norm": 11.913068771362305, "learning_rate": 8.058631578947369e-05, "loss": 2.2715, "step": 49 }, { "epoch": 0.15698587127158556, "grad_norm": 17.56863784790039, "learning_rate": 8.005263157894737e-05, "loss": 2.0193, "step": 50 }, { "epoch": 0.15698587127158556, "eval_loss": 1.7894538640975952, "eval_runtime": 3.7091, "eval_samples_per_second": 36.128, "eval_steps_per_second": 9.167, "step": 50 }, { "epoch": 0.16012558869701726, "grad_norm": 5.96769905090332, "learning_rate": 7.951894736842105e-05, "loss": 1.458, "step": 51 }, { "epoch": 0.16326530612244897, "grad_norm": 6.817239284515381, "learning_rate": 7.898526315789473e-05, "loss": 1.401, "step": 52 }, { "epoch": 0.1664050235478807, "grad_norm": 7.3476033210754395, "learning_rate": 7.845157894736841e-05, "loss": 1.8913, "step": 53 }, { "epoch": 0.1695447409733124, "grad_norm": 6.681586742401123, "learning_rate": 7.79178947368421e-05, "loss": 1.9426, "step": 54 }, { "epoch": 0.1726844583987441, "grad_norm": 6.83116340637207, "learning_rate": 7.73842105263158e-05, "loss": 2.0575, "step": 55 }, { "epoch": 0.17582417582417584, "grad_norm": 8.655893325805664, "learning_rate": 7.685052631578948e-05, "loss": 1.8735, "step": 56 }, { "epoch": 0.17896389324960754, "grad_norm": 7.9880194664001465, "learning_rate": 7.631684210526316e-05, "loss": 1.6114, "step": 57 }, { "epoch": 0.18210361067503925, "grad_norm": 5.3605055809021, "learning_rate": 7.578315789473684e-05, "loss": 1.5865, "step": 58 }, { "epoch": 0.18524332810047095, "grad_norm": 5.455999374389648, "learning_rate": 7.524947368421052e-05, "loss": 1.9276, "step": 59 }, { "epoch": 0.18838304552590268, "grad_norm": 6.868194103240967, "learning_rate": 7.47157894736842e-05, "loss": 2.0298, "step": 60 }, { "epoch": 0.19152276295133439, "grad_norm": 5.856205940246582, "learning_rate": 7.418210526315789e-05, "loss": 1.7768, "step": 61 }, { "epoch": 0.1946624803767661, "grad_norm": 5.616762161254883, "learning_rate": 7.364842105263159e-05, "loss": 1.8175, "step": 62 }, { "epoch": 0.1978021978021978, "grad_norm": 6.230925559997559, "learning_rate": 7.311473684210527e-05, "loss": 1.3076, "step": 63 }, { "epoch": 0.20094191522762953, "grad_norm": 6.4075798988342285, "learning_rate": 7.258105263157895e-05, "loss": 1.6211, "step": 64 }, { "epoch": 0.20408163265306123, "grad_norm": 5.425053119659424, "learning_rate": 7.204736842105263e-05, "loss": 1.5966, "step": 65 }, { "epoch": 0.20722135007849293, "grad_norm": 6.285729885101318, "learning_rate": 7.151368421052631e-05, "loss": 1.9486, "step": 66 }, { "epoch": 0.21036106750392464, "grad_norm": 8.634322166442871, "learning_rate": 7.097999999999999e-05, "loss": 1.6922, "step": 67 }, { "epoch": 0.21350078492935637, "grad_norm": 7.055955410003662, "learning_rate": 7.044631578947368e-05, "loss": 2.0679, "step": 68 }, { "epoch": 0.21664050235478807, "grad_norm": 6.999521732330322, "learning_rate": 6.991263157894738e-05, "loss": 2.0098, "step": 69 }, { "epoch": 0.21978021978021978, "grad_norm": 6.568216800689697, "learning_rate": 6.937894736842106e-05, "loss": 1.6266, "step": 70 }, { "epoch": 0.22291993720565148, "grad_norm": 6.6800689697265625, "learning_rate": 6.884526315789474e-05, "loss": 1.672, "step": 71 }, { "epoch": 0.2260596546310832, "grad_norm": 6.454991340637207, "learning_rate": 6.831157894736842e-05, "loss": 1.5975, "step": 72 }, { "epoch": 0.22919937205651492, "grad_norm": 5.533199310302734, "learning_rate": 6.77778947368421e-05, "loss": 1.2333, "step": 73 }, { "epoch": 0.23233908948194662, "grad_norm": 5.433882713317871, "learning_rate": 6.724421052631579e-05, "loss": 1.8005, "step": 74 }, { "epoch": 0.23547880690737832, "grad_norm": 7.069916248321533, "learning_rate": 6.671052631578948e-05, "loss": 1.8686, "step": 75 }, { "epoch": 0.23861852433281006, "grad_norm": 6.151008605957031, "learning_rate": 6.617684210526316e-05, "loss": 1.4558, "step": 76 }, { "epoch": 0.24175824175824176, "grad_norm": 5.1850104331970215, "learning_rate": 6.564315789473684e-05, "loss": 1.3295, "step": 77 }, { "epoch": 0.24489795918367346, "grad_norm": 5.7544331550598145, "learning_rate": 6.510947368421052e-05, "loss": 1.3047, "step": 78 }, { "epoch": 0.24803767660910517, "grad_norm": 6.109000205993652, "learning_rate": 6.45757894736842e-05, "loss": 1.4995, "step": 79 }, { "epoch": 0.25117739403453687, "grad_norm": 6.1000566482543945, "learning_rate": 6.404210526315789e-05, "loss": 1.1538, "step": 80 }, { "epoch": 0.2543171114599686, "grad_norm": 6.75916862487793, "learning_rate": 6.350842105263158e-05, "loss": 1.6112, "step": 81 }, { "epoch": 0.25745682888540034, "grad_norm": 5.901347637176514, "learning_rate": 6.297473684210527e-05, "loss": 1.2695, "step": 82 }, { "epoch": 0.260596546310832, "grad_norm": 6.797555923461914, "learning_rate": 6.244105263157895e-05, "loss": 1.7123, "step": 83 }, { "epoch": 0.26373626373626374, "grad_norm": 8.325261116027832, "learning_rate": 6.190736842105263e-05, "loss": 1.5652, "step": 84 }, { "epoch": 0.2668759811616955, "grad_norm": 7.468142509460449, "learning_rate": 6.137368421052631e-05, "loss": 1.85, "step": 85 }, { "epoch": 0.27001569858712715, "grad_norm": 6.591063499450684, "learning_rate": 6.0839999999999993e-05, "loss": 1.2159, "step": 86 }, { "epoch": 0.2731554160125589, "grad_norm": 7.766685962677002, "learning_rate": 6.030631578947368e-05, "loss": 1.9582, "step": 87 }, { "epoch": 0.27629513343799056, "grad_norm": 7.326028347015381, "learning_rate": 5.977263157894736e-05, "loss": 1.6978, "step": 88 }, { "epoch": 0.2794348508634223, "grad_norm": 7.96261739730835, "learning_rate": 5.9238947368421054e-05, "loss": 1.7344, "step": 89 }, { "epoch": 0.282574568288854, "grad_norm": 6.848148822784424, "learning_rate": 5.870526315789474e-05, "loss": 1.5061, "step": 90 }, { "epoch": 0.2857142857142857, "grad_norm": 6.8328752517700195, "learning_rate": 5.817157894736842e-05, "loss": 1.7432, "step": 91 }, { "epoch": 0.28885400313971743, "grad_norm": 11.828781127929688, "learning_rate": 5.76378947368421e-05, "loss": 2.2161, "step": 92 }, { "epoch": 0.29199372056514916, "grad_norm": 8.98495101928711, "learning_rate": 5.710421052631579e-05, "loss": 1.9033, "step": 93 }, { "epoch": 0.29513343799058084, "grad_norm": 16.75680160522461, "learning_rate": 5.657052631578947e-05, "loss": 2.0846, "step": 94 }, { "epoch": 0.29827315541601257, "grad_norm": 10.899314880371094, "learning_rate": 5.603684210526316e-05, "loss": 1.7966, "step": 95 }, { "epoch": 0.30141287284144425, "grad_norm": 8.247507095336914, "learning_rate": 5.550315789473684e-05, "loss": 1.4718, "step": 96 }, { "epoch": 0.304552590266876, "grad_norm": 10.83910846710205, "learning_rate": 5.496947368421053e-05, "loss": 1.6901, "step": 97 }, { "epoch": 0.3076923076923077, "grad_norm": 9.814567565917969, "learning_rate": 5.443578947368421e-05, "loss": 2.1697, "step": 98 }, { "epoch": 0.3108320251177394, "grad_norm": 11.087676048278809, "learning_rate": 5.390210526315789e-05, "loss": 1.9234, "step": 99 }, { "epoch": 0.3139717425431711, "grad_norm": 16.154081344604492, "learning_rate": 5.336842105263158e-05, "loss": 2.2297, "step": 100 }, { "epoch": 0.3139717425431711, "eval_loss": 1.6598618030548096, "eval_runtime": 3.7012, "eval_samples_per_second": 36.204, "eval_steps_per_second": 9.186, "step": 100 }, { "epoch": 0.31711145996860285, "grad_norm": 3.5819578170776367, "learning_rate": 5.283473684210526e-05, "loss": 1.0256, "step": 101 }, { "epoch": 0.3202511773940345, "grad_norm": 6.239628314971924, "learning_rate": 5.230105263157895e-05, "loss": 2.0302, "step": 102 }, { "epoch": 0.32339089481946626, "grad_norm": 6.583393573760986, "learning_rate": 5.176736842105263e-05, "loss": 1.8993, "step": 103 }, { "epoch": 0.32653061224489793, "grad_norm": 6.852742671966553, "learning_rate": 5.123368421052632e-05, "loss": 1.5832, "step": 104 }, { "epoch": 0.32967032967032966, "grad_norm": 6.229090690612793, "learning_rate": 5.07e-05, "loss": 1.7996, "step": 105 }, { "epoch": 0.3328100470957614, "grad_norm": 4.9963908195495605, "learning_rate": 5.016631578947368e-05, "loss": 1.3517, "step": 106 }, { "epoch": 0.3359497645211931, "grad_norm": 6.220682144165039, "learning_rate": 4.963263157894737e-05, "loss": 2.3754, "step": 107 }, { "epoch": 0.3390894819466248, "grad_norm": 5.58546781539917, "learning_rate": 4.909894736842105e-05, "loss": 1.9693, "step": 108 }, { "epoch": 0.34222919937205654, "grad_norm": 5.197764873504639, "learning_rate": 4.8565263157894734e-05, "loss": 1.1676, "step": 109 }, { "epoch": 0.3453689167974882, "grad_norm": 6.040829181671143, "learning_rate": 4.803157894736842e-05, "loss": 1.9262, "step": 110 }, { "epoch": 0.34850863422291994, "grad_norm": 6.036067485809326, "learning_rate": 4.749789473684211e-05, "loss": 1.875, "step": 111 }, { "epoch": 0.3516483516483517, "grad_norm": 5.573509693145752, "learning_rate": 4.696421052631579e-05, "loss": 1.5932, "step": 112 }, { "epoch": 0.35478806907378335, "grad_norm": 5.3029632568359375, "learning_rate": 4.643052631578947e-05, "loss": 1.3575, "step": 113 }, { "epoch": 0.3579277864992151, "grad_norm": 5.437764644622803, "learning_rate": 4.589684210526316e-05, "loss": 1.6235, "step": 114 }, { "epoch": 0.36106750392464676, "grad_norm": 4.923410415649414, "learning_rate": 4.536315789473684e-05, "loss": 1.3825, "step": 115 }, { "epoch": 0.3642072213500785, "grad_norm": 6.01725435256958, "learning_rate": 4.482947368421052e-05, "loss": 1.4735, "step": 116 }, { "epoch": 0.3673469387755102, "grad_norm": 6.325821876525879, "learning_rate": 4.429578947368421e-05, "loss": 1.6234, "step": 117 }, { "epoch": 0.3704866562009419, "grad_norm": 5.018939018249512, "learning_rate": 4.3762105263157896e-05, "loss": 1.7545, "step": 118 }, { "epoch": 0.37362637362637363, "grad_norm": 5.737129211425781, "learning_rate": 4.3228421052631576e-05, "loss": 1.7748, "step": 119 }, { "epoch": 0.37676609105180536, "grad_norm": 5.9904866218566895, "learning_rate": 4.269473684210526e-05, "loss": 1.4207, "step": 120 }, { "epoch": 0.37990580847723704, "grad_norm": 7.162813186645508, "learning_rate": 4.216105263157895e-05, "loss": 1.5775, "step": 121 }, { "epoch": 0.38304552590266877, "grad_norm": 4.571135520935059, "learning_rate": 4.162736842105263e-05, "loss": 1.6661, "step": 122 }, { "epoch": 0.38618524332810045, "grad_norm": 5.065517902374268, "learning_rate": 4.109368421052631e-05, "loss": 1.5862, "step": 123 }, { "epoch": 0.3893249607535322, "grad_norm": 7.399703025817871, "learning_rate": 4.0560000000000005e-05, "loss": 2.1326, "step": 124 }, { "epoch": 0.3924646781789639, "grad_norm": 5.505315780639648, "learning_rate": 4.0026315789473685e-05, "loss": 1.265, "step": 125 }, { "epoch": 0.3956043956043956, "grad_norm": 6.12184476852417, "learning_rate": 3.9492631578947365e-05, "loss": 1.5402, "step": 126 }, { "epoch": 0.3987441130298273, "grad_norm": 6.294252395629883, "learning_rate": 3.895894736842105e-05, "loss": 1.5752, "step": 127 }, { "epoch": 0.40188383045525905, "grad_norm": 6.789506435394287, "learning_rate": 3.842526315789474e-05, "loss": 1.9103, "step": 128 }, { "epoch": 0.4050235478806907, "grad_norm": 6.3864006996154785, "learning_rate": 3.789157894736842e-05, "loss": 1.7966, "step": 129 }, { "epoch": 0.40816326530612246, "grad_norm": 7.398538589477539, "learning_rate": 3.73578947368421e-05, "loss": 1.9872, "step": 130 }, { "epoch": 0.41130298273155413, "grad_norm": 7.464357376098633, "learning_rate": 3.682421052631579e-05, "loss": 1.8161, "step": 131 }, { "epoch": 0.41444270015698587, "grad_norm": 7.386998176574707, "learning_rate": 3.6290526315789474e-05, "loss": 1.5991, "step": 132 }, { "epoch": 0.4175824175824176, "grad_norm": 7.29685115814209, "learning_rate": 3.5756842105263154e-05, "loss": 1.7373, "step": 133 }, { "epoch": 0.4207221350078493, "grad_norm": 6.442423343658447, "learning_rate": 3.522315789473684e-05, "loss": 1.5062, "step": 134 }, { "epoch": 0.423861852433281, "grad_norm": 7.622251987457275, "learning_rate": 3.468947368421053e-05, "loss": 1.6362, "step": 135 }, { "epoch": 0.42700156985871274, "grad_norm": 5.391417503356934, "learning_rate": 3.415578947368421e-05, "loss": 1.3335, "step": 136 }, { "epoch": 0.4301412872841444, "grad_norm": 7.495037078857422, "learning_rate": 3.3622105263157895e-05, "loss": 2.1399, "step": 137 }, { "epoch": 0.43328100470957615, "grad_norm": 8.007742881774902, "learning_rate": 3.308842105263158e-05, "loss": 1.8728, "step": 138 }, { "epoch": 0.4364207221350079, "grad_norm": 5.115504741668701, "learning_rate": 3.255473684210526e-05, "loss": 1.3832, "step": 139 }, { "epoch": 0.43956043956043955, "grad_norm": 7.03964376449585, "learning_rate": 3.202105263157894e-05, "loss": 1.8599, "step": 140 }, { "epoch": 0.4427001569858713, "grad_norm": 7.27022647857666, "learning_rate": 3.1487368421052636e-05, "loss": 1.5044, "step": 141 }, { "epoch": 0.44583987441130296, "grad_norm": 8.688240051269531, "learning_rate": 3.0953684210526317e-05, "loss": 1.9889, "step": 142 }, { "epoch": 0.4489795918367347, "grad_norm": 7.870269298553467, "learning_rate": 3.0419999999999997e-05, "loss": 1.5771, "step": 143 }, { "epoch": 0.4521193092621664, "grad_norm": 6.881030082702637, "learning_rate": 2.988631578947368e-05, "loss": 1.2406, "step": 144 }, { "epoch": 0.4552590266875981, "grad_norm": 7.38179349899292, "learning_rate": 2.935263157894737e-05, "loss": 1.5986, "step": 145 }, { "epoch": 0.45839874411302983, "grad_norm": 7.3739800453186035, "learning_rate": 2.881894736842105e-05, "loss": 1.5282, "step": 146 }, { "epoch": 0.46153846153846156, "grad_norm": 8.048142433166504, "learning_rate": 2.8285263157894735e-05, "loss": 1.7917, "step": 147 }, { "epoch": 0.46467817896389324, "grad_norm": 8.414251327514648, "learning_rate": 2.775157894736842e-05, "loss": 2.0077, "step": 148 }, { "epoch": 0.46781789638932497, "grad_norm": 10.131597518920898, "learning_rate": 2.7217894736842105e-05, "loss": 2.0688, "step": 149 }, { "epoch": 0.47095761381475665, "grad_norm": 13.612863540649414, "learning_rate": 2.668421052631579e-05, "loss": 1.7537, "step": 150 }, { "epoch": 0.47095761381475665, "eval_loss": 1.542069435119629, "eval_runtime": 3.6868, "eval_samples_per_second": 36.346, "eval_steps_per_second": 9.222, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2898180911923200.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }