{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.980269989615784, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016614745586708203, "grad_norm": 0.061364494264125824, "learning_rate": 4.999991432639962e-05, "loss": 0.5857, "num_input_tokens_seen": 70408, "step": 1 }, { "epoch": 0.033229491173416406, "grad_norm": 0.0613991804420948, "learning_rate": 4.999965730618567e-05, "loss": 0.5331, "num_input_tokens_seen": 139640, "step": 2 }, { "epoch": 0.04984423676012461, "grad_norm": 0.06351307034492493, "learning_rate": 4.9999228941119745e-05, "loss": 0.5852, "num_input_tokens_seen": 223656, "step": 3 }, { "epoch": 0.06645898234683281, "grad_norm": 0.05762802064418793, "learning_rate": 4.999862923413781e-05, "loss": 0.5384, "num_input_tokens_seen": 300688, "step": 4 }, { "epoch": 0.08307372793354102, "grad_norm": 0.0632179006934166, "learning_rate": 4.999785818935018e-05, "loss": 0.5273, "num_input_tokens_seen": 366368, "step": 5 }, { "epoch": 0.09968847352024922, "grad_norm": 0.056689903140068054, "learning_rate": 4.999691581204152e-05, "loss": 0.5145, "num_input_tokens_seen": 445808, "step": 6 }, { "epoch": 0.11630321910695743, "grad_norm": 0.06574171781539917, "learning_rate": 4.9995802108670775e-05, "loss": 0.5301, "num_input_tokens_seen": 522800, "step": 7 }, { "epoch": 0.13291796469366562, "grad_norm": 0.06367070972919464, "learning_rate": 4.999451708687114e-05, "loss": 0.5552, "num_input_tokens_seen": 599608, "step": 8 }, { "epoch": 0.14953271028037382, "grad_norm": 0.0585966520011425, "learning_rate": 4.9993060755450015e-05, "loss": 0.5999, "num_input_tokens_seen": 681424, "step": 9 }, { "epoch": 0.16614745586708204, "grad_norm": 0.05650574713945389, "learning_rate": 4.999143312438893e-05, "loss": 0.4535, "num_input_tokens_seen": 756744, "step": 10 }, { "epoch": 0.18276220145379024, "grad_norm": 0.05954223498702049, "learning_rate": 4.998963420484349e-05, "loss": 0.4674, "num_input_tokens_seen": 842576, "step": 11 }, { "epoch": 0.19937694704049844, "grad_norm": 0.0663776770234108, "learning_rate": 4.998766400914329e-05, "loss": 0.4703, "num_input_tokens_seen": 917232, "step": 12 }, { "epoch": 0.21599169262720663, "grad_norm": 0.056374579668045044, "learning_rate": 4.9985522550791825e-05, "loss": 0.3725, "num_input_tokens_seen": 1006800, "step": 13 }, { "epoch": 0.23260643821391486, "grad_norm": 0.06437493115663528, "learning_rate": 4.998320984446641e-05, "loss": 0.4653, "num_input_tokens_seen": 1085824, "step": 14 }, { "epoch": 0.24922118380062305, "grad_norm": 0.06560757756233215, "learning_rate": 4.9980725906018074e-05, "loss": 0.5026, "num_input_tokens_seen": 1164160, "step": 15 }, { "epoch": 0.26583592938733125, "grad_norm": 0.06942517310380936, "learning_rate": 4.997807075247146e-05, "loss": 0.5401, "num_input_tokens_seen": 1242264, "step": 16 }, { "epoch": 0.2824506749740395, "grad_norm": 0.06349828094244003, "learning_rate": 4.997524440202469e-05, "loss": 0.4713, "num_input_tokens_seen": 1325904, "step": 17 }, { "epoch": 0.29906542056074764, "grad_norm": 0.08846385776996613, "learning_rate": 4.9972246874049254e-05, "loss": 0.5834, "num_input_tokens_seen": 1385632, "step": 18 }, { "epoch": 0.31568016614745587, "grad_norm": 0.062130190432071686, "learning_rate": 4.996907818908987e-05, "loss": 0.4045, "num_input_tokens_seen": 1470632, "step": 19 }, { "epoch": 0.3322949117341641, "grad_norm": 0.07743565738201141, "learning_rate": 4.996573836886435e-05, "loss": 0.5283, "num_input_tokens_seen": 1547536, "step": 20 }, { "epoch": 0.34890965732087226, "grad_norm": 0.06756695359945297, "learning_rate": 4.9962227436263453e-05, "loss": 0.4199, "num_input_tokens_seen": 1615528, "step": 21 }, { "epoch": 0.3655244029075805, "grad_norm": 0.08662309497594833, "learning_rate": 4.995854541535071e-05, "loss": 0.4775, "num_input_tokens_seen": 1694352, "step": 22 }, { "epoch": 0.3821391484942887, "grad_norm": 0.08380820602178574, "learning_rate": 4.9954692331362294e-05, "loss": 0.4871, "num_input_tokens_seen": 1753776, "step": 23 }, { "epoch": 0.3987538940809969, "grad_norm": 0.09967435896396637, "learning_rate": 4.995066821070679e-05, "loss": 0.4871, "num_input_tokens_seen": 1809048, "step": 24 }, { "epoch": 0.4153686396677051, "grad_norm": 0.0871267095208168, "learning_rate": 4.994647308096509e-05, "loss": 0.5461, "num_input_tokens_seen": 1884264, "step": 25 }, { "epoch": 0.43198338525441327, "grad_norm": 0.065020851790905, "learning_rate": 4.994210697089014e-05, "loss": 0.405, "num_input_tokens_seen": 1981704, "step": 26 }, { "epoch": 0.4485981308411215, "grad_norm": 0.09853450953960419, "learning_rate": 4.9937569910406756e-05, "loss": 0.4487, "num_input_tokens_seen": 2044144, "step": 27 }, { "epoch": 0.4652128764278297, "grad_norm": 0.08763110637664795, "learning_rate": 4.9932861930611454e-05, "loss": 0.3946, "num_input_tokens_seen": 2107584, "step": 28 }, { "epoch": 0.4818276220145379, "grad_norm": 0.08950547873973846, "learning_rate": 4.9927983063772196e-05, "loss": 0.4257, "num_input_tokens_seen": 2169248, "step": 29 }, { "epoch": 0.4984423676012461, "grad_norm": 0.09980211406946182, "learning_rate": 4.99229333433282e-05, "loss": 0.3911, "num_input_tokens_seen": 2230344, "step": 30 }, { "epoch": 0.5150571131879543, "grad_norm": 0.092055544257164, "learning_rate": 4.9917712803889674e-05, "loss": 0.3749, "num_input_tokens_seen": 2302368, "step": 31 }, { "epoch": 0.5316718587746625, "grad_norm": 0.10067818313837051, "learning_rate": 4.991232148123761e-05, "loss": 0.4761, "num_input_tokens_seen": 2369984, "step": 32 }, { "epoch": 0.5482866043613707, "grad_norm": 0.0717971920967102, "learning_rate": 4.990675941232353e-05, "loss": 0.4328, "num_input_tokens_seen": 2453032, "step": 33 }, { "epoch": 0.564901349948079, "grad_norm": 0.07436250895261765, "learning_rate": 4.990102663526924e-05, "loss": 0.417, "num_input_tokens_seen": 2527464, "step": 34 }, { "epoch": 0.5815160955347871, "grad_norm": 0.09256689995527267, "learning_rate": 4.989512318936655e-05, "loss": 0.4097, "num_input_tokens_seen": 2597032, "step": 35 }, { "epoch": 0.5981308411214953, "grad_norm": 0.09964177012443542, "learning_rate": 4.9889049115077005e-05, "loss": 0.4065, "num_input_tokens_seen": 2671704, "step": 36 }, { "epoch": 0.6147455867082036, "grad_norm": 0.06627887487411499, "learning_rate": 4.988280445403164e-05, "loss": 0.4136, "num_input_tokens_seen": 2767640, "step": 37 }, { "epoch": 0.6313603322949117, "grad_norm": 0.0746045857667923, "learning_rate": 4.987638924903067e-05, "loss": 0.4125, "num_input_tokens_seen": 2843720, "step": 38 }, { "epoch": 0.6479750778816199, "grad_norm": 0.0795741006731987, "learning_rate": 4.9869803544043166e-05, "loss": 0.3135, "num_input_tokens_seen": 2921472, "step": 39 }, { "epoch": 0.6645898234683282, "grad_norm": 0.08914181590080261, "learning_rate": 4.9863047384206835e-05, "loss": 0.4549, "num_input_tokens_seen": 2998400, "step": 40 }, { "epoch": 0.6812045690550363, "grad_norm": 0.11220043897628784, "learning_rate": 4.985612081582764e-05, "loss": 0.4135, "num_input_tokens_seen": 3059648, "step": 41 }, { "epoch": 0.6978193146417445, "grad_norm": 0.08390027284622192, "learning_rate": 4.98490238863795e-05, "loss": 0.3538, "num_input_tokens_seen": 3140184, "step": 42 }, { "epoch": 0.7144340602284528, "grad_norm": 0.08858532458543777, "learning_rate": 4.984175664450397e-05, "loss": 0.3644, "num_input_tokens_seen": 3207184, "step": 43 }, { "epoch": 0.731048805815161, "grad_norm": 0.07439564168453217, "learning_rate": 4.983431914000991e-05, "loss": 0.4019, "num_input_tokens_seen": 3292344, "step": 44 }, { "epoch": 0.7476635514018691, "grad_norm": 0.08694300055503845, "learning_rate": 4.982671142387316e-05, "loss": 0.4238, "num_input_tokens_seen": 3365384, "step": 45 }, { "epoch": 0.7642782969885774, "grad_norm": 0.0867784395813942, "learning_rate": 4.981893354823614e-05, "loss": 0.3702, "num_input_tokens_seen": 3440720, "step": 46 }, { "epoch": 0.7808930425752856, "grad_norm": 0.06278439611196518, "learning_rate": 4.9810985566407544e-05, "loss": 0.3354, "num_input_tokens_seen": 3533576, "step": 47 }, { "epoch": 0.7975077881619937, "grad_norm": 0.08999717980623245, "learning_rate": 4.980286753286195e-05, "loss": 0.4981, "num_input_tokens_seen": 3599744, "step": 48 }, { "epoch": 0.814122533748702, "grad_norm": 0.07938859611749649, "learning_rate": 4.979457950323945e-05, "loss": 0.4016, "num_input_tokens_seen": 3689520, "step": 49 }, { "epoch": 0.8307372793354102, "grad_norm": 0.1045590192079544, "learning_rate": 4.9786121534345265e-05, "loss": 0.388, "num_input_tokens_seen": 3751808, "step": 50 }, { "epoch": 0.8473520249221184, "grad_norm": 0.07890618592500687, "learning_rate": 4.9777493684149375e-05, "loss": 0.3674, "num_input_tokens_seen": 3839096, "step": 51 }, { "epoch": 0.8639667705088265, "grad_norm": 0.07802557945251465, "learning_rate": 4.976869601178609e-05, "loss": 0.4147, "num_input_tokens_seen": 3919824, "step": 52 }, { "epoch": 0.8805815160955348, "grad_norm": 0.0913538783788681, "learning_rate": 4.975972857755369e-05, "loss": 0.2978, "num_input_tokens_seen": 3989312, "step": 53 }, { "epoch": 0.897196261682243, "grad_norm": 0.08525951951742172, "learning_rate": 4.975059144291394e-05, "loss": 0.3923, "num_input_tokens_seen": 4060528, "step": 54 }, { "epoch": 0.9138110072689511, "grad_norm": 0.08649709820747375, "learning_rate": 4.974128467049176e-05, "loss": 0.3282, "num_input_tokens_seen": 4129368, "step": 55 }, { "epoch": 0.9304257528556594, "grad_norm": 0.11635593324899673, "learning_rate": 4.9731808324074717e-05, "loss": 0.3403, "num_input_tokens_seen": 4175208, "step": 56 }, { "epoch": 0.9470404984423676, "grad_norm": 0.1115177720785141, "learning_rate": 4.972216246861262e-05, "loss": 0.3191, "num_input_tokens_seen": 4218096, "step": 57 }, { "epoch": 0.9636552440290758, "grad_norm": 0.0986371859908104, "learning_rate": 4.971234717021709e-05, "loss": 0.3745, "num_input_tokens_seen": 4275968, "step": 58 }, { "epoch": 0.980269989615784, "grad_norm": 0.07860780507326126, "learning_rate": 4.9702362496161085e-05, "loss": 0.3129, "num_input_tokens_seen": 4346616, "step": 59 }, { "epoch": 0.9968847352024922, "grad_norm": 0.08581527322530746, "learning_rate": 4.9692208514878444e-05, "loss": 0.3324, "num_input_tokens_seen": 4425064, "step": 60 }, { "epoch": 1.0, "grad_norm": 0.16779834032058716, "learning_rate": 4.968188529596342e-05, "loss": 0.2814, "num_input_tokens_seen": 4435328, "step": 61 }, { "epoch": 1.0166147455867083, "grad_norm": 0.08948636800050735, "learning_rate": 4.9671392910170185e-05, "loss": 0.3467, "num_input_tokens_seen": 4500104, "step": 62 }, { "epoch": 1.0332294911734163, "grad_norm": 0.07826830446720123, "learning_rate": 4.966073142941239e-05, "loss": 0.3892, "num_input_tokens_seen": 4581976, "step": 63 }, { "epoch": 1.0498442367601246, "grad_norm": 0.08562575280666351, "learning_rate": 4.964990092676263e-05, "loss": 0.3354, "num_input_tokens_seen": 4652160, "step": 64 }, { "epoch": 1.066458982346833, "grad_norm": 0.1057090312242508, "learning_rate": 4.9638901476451946e-05, "loss": 0.3457, "num_input_tokens_seen": 4709368, "step": 65 }, { "epoch": 1.083073727933541, "grad_norm": 0.08131146430969238, "learning_rate": 4.962773315386935e-05, "loss": 0.3672, "num_input_tokens_seen": 4798256, "step": 66 }, { "epoch": 1.0996884735202492, "grad_norm": 0.09464936703443527, "learning_rate": 4.961639603556127e-05, "loss": 0.3157, "num_input_tokens_seen": 4859200, "step": 67 }, { "epoch": 1.1163032191069575, "grad_norm": 0.0999661460518837, "learning_rate": 4.960489019923105e-05, "loss": 0.3968, "num_input_tokens_seen": 4925992, "step": 68 }, { "epoch": 1.1329179646936656, "grad_norm": 0.09851639717817307, "learning_rate": 4.9593215723738404e-05, "loss": 0.329, "num_input_tokens_seen": 4998808, "step": 69 }, { "epoch": 1.1495327102803738, "grad_norm": 0.08382592350244522, "learning_rate": 4.958137268909887e-05, "loss": 0.2856, "num_input_tokens_seen": 5089672, "step": 70 }, { "epoch": 1.1661474558670821, "grad_norm": 0.09073847532272339, "learning_rate": 4.9569361176483286e-05, "loss": 0.3512, "num_input_tokens_seen": 5166744, "step": 71 }, { "epoch": 1.1827622014537902, "grad_norm": 0.10290185362100601, "learning_rate": 4.9557181268217227e-05, "loss": 0.4263, "num_input_tokens_seen": 5228264, "step": 72 }, { "epoch": 1.1993769470404985, "grad_norm": 0.07421435415744781, "learning_rate": 4.9544833047780394e-05, "loss": 0.3126, "num_input_tokens_seen": 5338224, "step": 73 }, { "epoch": 1.2159916926272065, "grad_norm": 0.10284842550754547, "learning_rate": 4.9532316599806124e-05, "loss": 0.3473, "num_input_tokens_seen": 5399848, "step": 74 }, { "epoch": 1.2326064382139148, "grad_norm": 0.10817047953605652, "learning_rate": 4.951963201008076e-05, "loss": 0.3275, "num_input_tokens_seen": 5468624, "step": 75 }, { "epoch": 1.249221183800623, "grad_norm": 0.09662210941314697, "learning_rate": 4.9506779365543046e-05, "loss": 0.3296, "num_input_tokens_seen": 5536776, "step": 76 }, { "epoch": 1.2658359293873311, "grad_norm": 0.11193853616714478, "learning_rate": 4.949375875428357e-05, "loss": 0.3605, "num_input_tokens_seen": 5609296, "step": 77 }, { "epoch": 1.2824506749740394, "grad_norm": 0.11866679787635803, "learning_rate": 4.9480570265544144e-05, "loss": 0.3133, "num_input_tokens_seen": 5663824, "step": 78 }, { "epoch": 1.2990654205607477, "grad_norm": 0.09865846484899521, "learning_rate": 4.94672139897172e-05, "loss": 0.3464, "num_input_tokens_seen": 5742032, "step": 79 }, { "epoch": 1.3156801661474558, "grad_norm": 0.09930054098367691, "learning_rate": 4.9453690018345144e-05, "loss": 0.3346, "num_input_tokens_seen": 5816864, "step": 80 }, { "epoch": 1.332294911734164, "grad_norm": 0.1085321381688118, "learning_rate": 4.943999844411977e-05, "loss": 0.3102, "num_input_tokens_seen": 5881624, "step": 81 }, { "epoch": 1.3489096573208723, "grad_norm": 0.08012478053569794, "learning_rate": 4.94261393608816e-05, "loss": 0.2853, "num_input_tokens_seen": 5970272, "step": 82 }, { "epoch": 1.3655244029075804, "grad_norm": 0.10291877388954163, "learning_rate": 4.941211286361922e-05, "loss": 0.3038, "num_input_tokens_seen": 6058752, "step": 83 }, { "epoch": 1.3821391484942886, "grad_norm": 0.11999356001615524, "learning_rate": 4.939791904846869e-05, "loss": 0.3283, "num_input_tokens_seen": 6120064, "step": 84 }, { "epoch": 1.398753894080997, "grad_norm": 0.10502559691667557, "learning_rate": 4.938355801271282e-05, "loss": 0.321, "num_input_tokens_seen": 6182072, "step": 85 }, { "epoch": 1.415368639667705, "grad_norm": 0.12620873749256134, "learning_rate": 4.936902985478055e-05, "loss": 0.3296, "num_input_tokens_seen": 6269680, "step": 86 }, { "epoch": 1.4319833852544133, "grad_norm": 0.13212910294532776, "learning_rate": 4.935433467424624e-05, "loss": 0.3225, "num_input_tokens_seen": 6347424, "step": 87 }, { "epoch": 1.4485981308411215, "grad_norm": 0.11600925773382187, "learning_rate": 4.933947257182901e-05, "loss": 0.3479, "num_input_tokens_seen": 6412584, "step": 88 }, { "epoch": 1.4652128764278296, "grad_norm": 0.11683235317468643, "learning_rate": 4.932444364939205e-05, "loss": 0.3322, "num_input_tokens_seen": 6482728, "step": 89 }, { "epoch": 1.4818276220145379, "grad_norm": 0.11446017026901245, "learning_rate": 4.9309248009941914e-05, "loss": 0.3802, "num_input_tokens_seen": 6562104, "step": 90 }, { "epoch": 1.4984423676012462, "grad_norm": 0.10500892251729965, "learning_rate": 4.929388575762782e-05, "loss": 0.3371, "num_input_tokens_seen": 6656552, "step": 91 }, { "epoch": 1.5150571131879542, "grad_norm": 0.13279151916503906, "learning_rate": 4.9278356997740904e-05, "loss": 0.293, "num_input_tokens_seen": 6714184, "step": 92 }, { "epoch": 1.5316718587746625, "grad_norm": 0.107506163418293, "learning_rate": 4.9262661836713564e-05, "loss": 0.3127, "num_input_tokens_seen": 6793552, "step": 93 }, { "epoch": 1.5482866043613708, "grad_norm": 0.124021977186203, "learning_rate": 4.924680038211867e-05, "loss": 0.3263, "num_input_tokens_seen": 6865256, "step": 94 }, { "epoch": 1.5649013499480788, "grad_norm": 0.14172782003879547, "learning_rate": 4.9230772742668866e-05, "loss": 0.3204, "num_input_tokens_seen": 6931152, "step": 95 }, { "epoch": 1.5815160955347871, "grad_norm": 0.12229758501052856, "learning_rate": 4.9214579028215776e-05, "loss": 0.326, "num_input_tokens_seen": 6998408, "step": 96 }, { "epoch": 1.5981308411214954, "grad_norm": 0.1242135688662529, "learning_rate": 4.919821934974933e-05, "loss": 0.2814, "num_input_tokens_seen": 7053008, "step": 97 }, { "epoch": 1.6147455867082035, "grad_norm": 0.12830108404159546, "learning_rate": 4.918169381939692e-05, "loss": 0.3254, "num_input_tokens_seen": 7106440, "step": 98 }, { "epoch": 1.6313603322949117, "grad_norm": 0.12180659174919128, "learning_rate": 4.916500255042268e-05, "loss": 0.3228, "num_input_tokens_seen": 7167032, "step": 99 }, { "epoch": 1.64797507788162, "grad_norm": 0.10792312026023865, "learning_rate": 4.914814565722671e-05, "loss": 0.2729, "num_input_tokens_seen": 7245720, "step": 100 }, { "epoch": 1.664589823468328, "grad_norm": 0.18523500859737396, "learning_rate": 4.913112325534426e-05, "loss": 0.3462, "num_input_tokens_seen": 7326320, "step": 101 }, { "epoch": 1.6812045690550363, "grad_norm": 0.09529964625835419, "learning_rate": 4.9113935461444955e-05, "loss": 0.3096, "num_input_tokens_seen": 7442232, "step": 102 }, { "epoch": 1.6978193146417446, "grad_norm": 0.14481183886528015, "learning_rate": 4.9096582393332025e-05, "loss": 0.3014, "num_input_tokens_seen": 7502496, "step": 103 }, { "epoch": 1.7144340602284527, "grad_norm": 0.14645016193389893, "learning_rate": 4.907906416994146e-05, "loss": 0.3336, "num_input_tokens_seen": 7566496, "step": 104 }, { "epoch": 1.731048805815161, "grad_norm": 0.1306885927915573, "learning_rate": 4.906138091134118e-05, "loss": 0.3911, "num_input_tokens_seen": 7629056, "step": 105 }, { "epoch": 1.7476635514018692, "grad_norm": 0.10863160341978073, "learning_rate": 4.9043532738730284e-05, "loss": 0.3201, "num_input_tokens_seen": 7706096, "step": 106 }, { "epoch": 1.7642782969885773, "grad_norm": 0.11725673079490662, "learning_rate": 4.9025519774438136e-05, "loss": 0.2783, "num_input_tokens_seen": 7780072, "step": 107 }, { "epoch": 1.7808930425752856, "grad_norm": 0.1243867501616478, "learning_rate": 4.900734214192358e-05, "loss": 0.3044, "num_input_tokens_seen": 7857712, "step": 108 }, { "epoch": 1.7975077881619939, "grad_norm": 0.13539955019950867, "learning_rate": 4.898899996577407e-05, "loss": 0.3009, "num_input_tokens_seen": 7916832, "step": 109 }, { "epoch": 1.814122533748702, "grad_norm": 0.11198178678750992, "learning_rate": 4.8970493371704826e-05, "loss": 0.3229, "num_input_tokens_seen": 7993056, "step": 110 }, { "epoch": 1.8307372793354102, "grad_norm": 0.11881165206432343, "learning_rate": 4.8951822486557986e-05, "loss": 0.3414, "num_input_tokens_seen": 8090056, "step": 111 }, { "epoch": 1.8473520249221185, "grad_norm": 0.12841404974460602, "learning_rate": 4.893298743830168e-05, "loss": 0.2907, "num_input_tokens_seen": 8164808, "step": 112 }, { "epoch": 1.8639667705088265, "grad_norm": 0.14767521619796753, "learning_rate": 4.891398835602925e-05, "loss": 0.2901, "num_input_tokens_seen": 8223568, "step": 113 }, { "epoch": 1.8805815160955348, "grad_norm": 0.15326914191246033, "learning_rate": 4.8894825369958255e-05, "loss": 0.2918, "num_input_tokens_seen": 8276160, "step": 114 }, { "epoch": 1.897196261682243, "grad_norm": 0.1210051029920578, "learning_rate": 4.8875498611429674e-05, "loss": 0.3074, "num_input_tokens_seen": 8354904, "step": 115 }, { "epoch": 1.9138110072689511, "grad_norm": 0.13544373214244843, "learning_rate": 4.8856008212906925e-05, "loss": 0.3461, "num_input_tokens_seen": 8442584, "step": 116 }, { "epoch": 1.9304257528556594, "grad_norm": 0.13535892963409424, "learning_rate": 4.8836354307975026e-05, "loss": 0.3078, "num_input_tokens_seen": 8506688, "step": 117 }, { "epoch": 1.9470404984423677, "grad_norm": 0.10383590310811996, "learning_rate": 4.881653703133966e-05, "loss": 0.2432, "num_input_tokens_seen": 8610712, "step": 118 }, { "epoch": 1.9636552440290758, "grad_norm": 0.12125886976718903, "learning_rate": 4.87965565188262e-05, "loss": 0.2915, "num_input_tokens_seen": 8692624, "step": 119 }, { "epoch": 1.980269989615784, "grad_norm": 0.1351424902677536, "learning_rate": 4.877641290737884e-05, "loss": 0.3006, "num_input_tokens_seen": 8772208, "step": 120 } ], "logging_steps": 1.0, "max_steps": 1200, "num_input_tokens_seen": 8772208, "num_train_epochs": 20, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.742744632748933e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }