{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3333333333333333, "eval_steps": 500, "global_step": 157, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0021231422505307855, "grad_norm": 6.233692311689662, "learning_rate": 0.0, "loss": 1.3677, "step": 1 }, { "epoch": 0.004246284501061571, "grad_norm": 6.03613913611666, "learning_rate": 2.0833333333333333e-07, "loss": 1.4092, "step": 2 }, { "epoch": 0.006369426751592357, "grad_norm": 5.843878249582451, "learning_rate": 4.1666666666666667e-07, "loss": 1.4005, "step": 3 }, { "epoch": 0.008492569002123142, "grad_norm": 6.2100854445399305, "learning_rate": 6.25e-07, "loss": 1.4027, "step": 4 }, { "epoch": 0.010615711252653927, "grad_norm": 5.700197750457959, "learning_rate": 8.333333333333333e-07, "loss": 1.3859, "step": 5 }, { "epoch": 0.012738853503184714, "grad_norm": 5.708735743112818, "learning_rate": 1.0416666666666667e-06, "loss": 1.4135, "step": 6 }, { "epoch": 0.014861995753715499, "grad_norm": 5.584947016091844, "learning_rate": 1.25e-06, "loss": 1.3083, "step": 7 }, { "epoch": 0.016985138004246284, "grad_norm": 5.5017514099891125, "learning_rate": 1.4583333333333335e-06, "loss": 1.3761, "step": 8 }, { "epoch": 0.01910828025477707, "grad_norm": 5.240707118749176, "learning_rate": 1.6666666666666667e-06, "loss": 1.374, "step": 9 }, { "epoch": 0.021231422505307854, "grad_norm": 5.159923266283754, "learning_rate": 1.8750000000000003e-06, "loss": 1.3837, "step": 10 }, { "epoch": 0.02335456475583864, "grad_norm": 4.23439034643152, "learning_rate": 2.0833333333333334e-06, "loss": 1.3278, "step": 11 }, { "epoch": 0.025477707006369428, "grad_norm": 4.23403012426509, "learning_rate": 2.2916666666666666e-06, "loss": 1.3095, "step": 12 }, { "epoch": 0.027600849256900213, "grad_norm": 4.033469595815775, "learning_rate": 2.5e-06, "loss": 1.3736, "step": 13 }, { "epoch": 0.029723991507430998, "grad_norm": 3.4140671410294092, "learning_rate": 2.7083333333333334e-06, "loss": 1.2966, "step": 14 }, { "epoch": 0.03184713375796178, "grad_norm": 3.2252832240083946, "learning_rate": 2.916666666666667e-06, "loss": 1.3211, "step": 15 }, { "epoch": 0.03397027600849257, "grad_norm": 2.958759736658132, "learning_rate": 3.125e-06, "loss": 1.284, "step": 16 }, { "epoch": 0.036093418259023353, "grad_norm": 3.0082219342870764, "learning_rate": 3.3333333333333333e-06, "loss": 1.2574, "step": 17 }, { "epoch": 0.03821656050955414, "grad_norm": 2.7607212501829186, "learning_rate": 3.5416666666666673e-06, "loss": 1.2222, "step": 18 }, { "epoch": 0.040339702760084924, "grad_norm": 1.7794382970826705, "learning_rate": 3.7500000000000005e-06, "loss": 1.1952, "step": 19 }, { "epoch": 0.04246284501061571, "grad_norm": 1.9000490101749923, "learning_rate": 3.958333333333333e-06, "loss": 1.0968, "step": 20 }, { "epoch": 0.044585987261146494, "grad_norm": 2.184749467485857, "learning_rate": 4.166666666666667e-06, "loss": 1.2058, "step": 21 }, { "epoch": 0.04670912951167728, "grad_norm": 2.076583497979509, "learning_rate": 4.3750000000000005e-06, "loss": 1.1726, "step": 22 }, { "epoch": 0.04883227176220807, "grad_norm": 1.6762408968746727, "learning_rate": 4.583333333333333e-06, "loss": 1.0816, "step": 23 }, { "epoch": 0.050955414012738856, "grad_norm": 1.655763485736625, "learning_rate": 4.791666666666668e-06, "loss": 1.0881, "step": 24 }, { "epoch": 0.05307855626326964, "grad_norm": 1.404756822747019, "learning_rate": 5e-06, "loss": 1.111, "step": 25 }, { "epoch": 0.055201698513800426, "grad_norm": 1.7450925608231072, "learning_rate": 4.999938256261134e-06, "loss": 1.0968, "step": 26 }, { "epoch": 0.05732484076433121, "grad_norm": 1.7455723981125268, "learning_rate": 4.999753028094368e-06, "loss": 1.135, "step": 27 }, { "epoch": 0.059447983014861996, "grad_norm": 1.765180118972071, "learning_rate": 4.999444324649045e-06, "loss": 1.1332, "step": 28 }, { "epoch": 0.06157112526539278, "grad_norm": 1.7025986108773807, "learning_rate": 4.99901216117357e-06, "loss": 1.1037, "step": 29 }, { "epoch": 0.06369426751592357, "grad_norm": 1.6843781660517996, "learning_rate": 4.998456559014653e-06, "loss": 1.1036, "step": 30 }, { "epoch": 0.06581740976645435, "grad_norm": 1.4971568544476521, "learning_rate": 4.997777545616258e-06, "loss": 1.0896, "step": 31 }, { "epoch": 0.06794055201698514, "grad_norm": 1.2471663073080574, "learning_rate": 4.996975154518245e-06, "loss": 1.0816, "step": 32 }, { "epoch": 0.07006369426751592, "grad_norm": 1.1099200791707107, "learning_rate": 4.996049425354717e-06, "loss": 1.0408, "step": 33 }, { "epoch": 0.07218683651804671, "grad_norm": 1.1596577803643031, "learning_rate": 4.995000403852057e-06, "loss": 1.0348, "step": 34 }, { "epoch": 0.07430997876857749, "grad_norm": 0.9836703479205239, "learning_rate": 4.993828141826672e-06, "loss": 1.0397, "step": 35 }, { "epoch": 0.07643312101910828, "grad_norm": 4.973840678082251, "learning_rate": 4.992532697182434e-06, "loss": 1.0692, "step": 36 }, { "epoch": 0.07855626326963906, "grad_norm": 0.9322137357537782, "learning_rate": 4.991114133907822e-06, "loss": 1.0056, "step": 37 }, { "epoch": 0.08067940552016985, "grad_norm": 1.0203537152829516, "learning_rate": 4.989572522072753e-06, "loss": 0.9956, "step": 38 }, { "epoch": 0.08280254777070063, "grad_norm": 1.0385035368583873, "learning_rate": 4.9879079378251325e-06, "loss": 0.9772, "step": 39 }, { "epoch": 0.08492569002123142, "grad_norm": 0.9097103872410344, "learning_rate": 4.986120463387084e-06, "loss": 1.0256, "step": 40 }, { "epoch": 0.0870488322717622, "grad_norm": 1.0611463152500384, "learning_rate": 4.984210187050891e-06, "loss": 0.9883, "step": 41 }, { "epoch": 0.08917197452229299, "grad_norm": 0.779249244150645, "learning_rate": 4.982177203174636e-06, "loss": 0.9847, "step": 42 }, { "epoch": 0.09129511677282377, "grad_norm": 0.7658008659883724, "learning_rate": 4.9800216121775404e-06, "loss": 0.9829, "step": 43 }, { "epoch": 0.09341825902335456, "grad_norm": 0.7598772913633557, "learning_rate": 4.977743520535001e-06, "loss": 1.0496, "step": 44 }, { "epoch": 0.09554140127388536, "grad_norm": 0.8141928977386432, "learning_rate": 4.975343040773335e-06, "loss": 1.0519, "step": 45 }, { "epoch": 0.09766454352441614, "grad_norm": 0.8911722172701033, "learning_rate": 4.972820291464219e-06, "loss": 1.0345, "step": 46 }, { "epoch": 0.09978768577494693, "grad_norm": 0.8358128773624875, "learning_rate": 4.970175397218832e-06, "loss": 0.9987, "step": 47 }, { "epoch": 0.10191082802547771, "grad_norm": 0.882046205562171, "learning_rate": 4.967408488681702e-06, "loss": 1.0004, "step": 48 }, { "epoch": 0.1040339702760085, "grad_norm": 0.8308643665449907, "learning_rate": 4.964519702524251e-06, "loss": 0.9867, "step": 49 }, { "epoch": 0.10615711252653928, "grad_norm": 0.8727814847927396, "learning_rate": 4.9615091814380465e-06, "loss": 1.0236, "step": 50 }, { "epoch": 0.10828025477707007, "grad_norm": 0.8128090079858757, "learning_rate": 4.958377074127751e-06, "loss": 1.0096, "step": 51 }, { "epoch": 0.11040339702760085, "grad_norm": 0.7041201796080805, "learning_rate": 4.955123535303775e-06, "loss": 1.0161, "step": 52 }, { "epoch": 0.11252653927813164, "grad_norm": 0.7034976999278052, "learning_rate": 4.951748725674643e-06, "loss": 0.9444, "step": 53 }, { "epoch": 0.11464968152866242, "grad_norm": 0.7111287475587301, "learning_rate": 4.948252811939044e-06, "loss": 1.0034, "step": 54 }, { "epoch": 0.11677282377919321, "grad_norm": 0.7112458705577068, "learning_rate": 4.944635966777607e-06, "loss": 1.0413, "step": 55 }, { "epoch": 0.11889596602972399, "grad_norm": 0.7351439597774859, "learning_rate": 4.940898368844366e-06, "loss": 1.0663, "step": 56 }, { "epoch": 0.12101910828025478, "grad_norm": 0.7120598412888705, "learning_rate": 4.937040202757937e-06, "loss": 1.0058, "step": 57 }, { "epoch": 0.12314225053078556, "grad_norm": 0.8009131327695405, "learning_rate": 4.933061659092401e-06, "loss": 1.0048, "step": 58 }, { "epoch": 0.12526539278131635, "grad_norm": 0.8355353984744548, "learning_rate": 4.928962934367887e-06, "loss": 1.0005, "step": 59 }, { "epoch": 0.12738853503184713, "grad_norm": 0.7292500219199575, "learning_rate": 4.924744231040865e-06, "loss": 0.9948, "step": 60 }, { "epoch": 0.12951167728237792, "grad_norm": 0.7441720822069011, "learning_rate": 4.920405757494147e-06, "loss": 0.9203, "step": 61 }, { "epoch": 0.1316348195329087, "grad_norm": 0.7164757410132886, "learning_rate": 4.915947728026599e-06, "loss": 0.9826, "step": 62 }, { "epoch": 0.1337579617834395, "grad_norm": 0.6688811212861865, "learning_rate": 4.911370362842544e-06, "loss": 1.0041, "step": 63 }, { "epoch": 0.13588110403397027, "grad_norm": 0.7276933998413905, "learning_rate": 4.906673888040895e-06, "loss": 1.0254, "step": 64 }, { "epoch": 0.13800424628450106, "grad_norm": 0.8980292377631762, "learning_rate": 4.901858535603982e-06, "loss": 1.0341, "step": 65 }, { "epoch": 0.14012738853503184, "grad_norm": 0.7213457650159387, "learning_rate": 4.896924543386099e-06, "loss": 0.9732, "step": 66 }, { "epoch": 0.14225053078556263, "grad_norm": 0.6575608860005288, "learning_rate": 4.891872155101746e-06, "loss": 0.9758, "step": 67 }, { "epoch": 0.14437367303609341, "grad_norm": 0.7227626661588851, "learning_rate": 4.886701620313595e-06, "loss": 0.9736, "step": 68 }, { "epoch": 0.1464968152866242, "grad_norm": 0.7820534177881011, "learning_rate": 4.88141319442017e-06, "loss": 0.9519, "step": 69 }, { "epoch": 0.14861995753715498, "grad_norm": 0.7464763101124039, "learning_rate": 4.876007138643216e-06, "loss": 1.0623, "step": 70 }, { "epoch": 0.15074309978768577, "grad_norm": 0.8031125528787094, "learning_rate": 4.870483720014814e-06, "loss": 1.0023, "step": 71 }, { "epoch": 0.15286624203821655, "grad_norm": 0.7252756266115659, "learning_rate": 4.8648432113641765e-06, "loss": 1.025, "step": 72 }, { "epoch": 0.15498938428874734, "grad_norm": 0.7685738411441779, "learning_rate": 4.8590858913041775e-06, "loss": 0.9701, "step": 73 }, { "epoch": 0.15711252653927812, "grad_norm": 0.7119059019537681, "learning_rate": 4.853212044217591e-06, "loss": 0.9471, "step": 74 }, { "epoch": 0.1592356687898089, "grad_norm": 0.670302973142834, "learning_rate": 4.847221960243042e-06, "loss": 0.9918, "step": 75 }, { "epoch": 0.1613588110403397, "grad_norm": 0.7274878719799441, "learning_rate": 4.8411159352606735e-06, "loss": 0.9114, "step": 76 }, { "epoch": 0.16348195329087048, "grad_norm": 0.6706478978040786, "learning_rate": 4.834894270877536e-06, "loss": 0.9628, "step": 77 }, { "epoch": 0.16560509554140126, "grad_norm": 0.7197495557043765, "learning_rate": 4.828557274412686e-06, "loss": 0.9792, "step": 78 }, { "epoch": 0.16772823779193205, "grad_norm": 0.6740897649139851, "learning_rate": 4.822105258882007e-06, "loss": 0.978, "step": 79 }, { "epoch": 0.16985138004246284, "grad_norm": 0.7298858435711825, "learning_rate": 4.815538542982751e-06, "loss": 1.0749, "step": 80 }, { "epoch": 0.17197452229299362, "grad_norm": 0.7437812096942298, "learning_rate": 4.808857451077788e-06, "loss": 1.0027, "step": 81 }, { "epoch": 0.1740976645435244, "grad_norm": 0.7793372891907893, "learning_rate": 4.802062313179595e-06, "loss": 1.056, "step": 82 }, { "epoch": 0.1762208067940552, "grad_norm": 0.7200522013491976, "learning_rate": 4.795153464933948e-06, "loss": 0.9804, "step": 83 }, { "epoch": 0.17834394904458598, "grad_norm": 0.6800845586517711, "learning_rate": 4.7881312476033444e-06, "loss": 1.0096, "step": 84 }, { "epoch": 0.18046709129511676, "grad_norm": 0.6876028206701021, "learning_rate": 4.7809960080501464e-06, "loss": 0.9886, "step": 85 }, { "epoch": 0.18259023354564755, "grad_norm": 0.7119685223963281, "learning_rate": 4.773748098719448e-06, "loss": 1.019, "step": 86 }, { "epoch": 0.18471337579617833, "grad_norm": 0.6948951583741163, "learning_rate": 4.766387877621667e-06, "loss": 1.0177, "step": 87 }, { "epoch": 0.18683651804670912, "grad_norm": 0.7252384478947084, "learning_rate": 4.758915708314858e-06, "loss": 0.952, "step": 88 }, { "epoch": 0.18895966029723993, "grad_norm": 0.7132363877921689, "learning_rate": 4.751331959886758e-06, "loss": 0.9311, "step": 89 }, { "epoch": 0.1910828025477707, "grad_norm": 0.6672381978657935, "learning_rate": 4.743637006936552e-06, "loss": 1.0019, "step": 90 }, { "epoch": 0.1932059447983015, "grad_norm": 0.7074654491248366, "learning_rate": 4.735831229556374e-06, "loss": 0.9249, "step": 91 }, { "epoch": 0.19532908704883228, "grad_norm": 0.8263034494745758, "learning_rate": 4.727915013312527e-06, "loss": 0.9627, "step": 92 }, { "epoch": 0.19745222929936307, "grad_norm": 0.7564665006141713, "learning_rate": 4.719888749226442e-06, "loss": 0.9788, "step": 93 }, { "epoch": 0.19957537154989385, "grad_norm": 0.7401712745437141, "learning_rate": 4.711752833755362e-06, "loss": 0.9595, "step": 94 }, { "epoch": 0.20169851380042464, "grad_norm": 0.7177547311512174, "learning_rate": 4.70350766877276e-06, "loss": 1.0125, "step": 95 }, { "epoch": 0.20382165605095542, "grad_norm": 0.8365049197183004, "learning_rate": 4.695153661548486e-06, "loss": 0.9617, "step": 96 }, { "epoch": 0.2059447983014862, "grad_norm": 0.8275524843143772, "learning_rate": 4.686691224728652e-06, "loss": 0.9583, "step": 97 }, { "epoch": 0.208067940552017, "grad_norm": 0.7849897218743559, "learning_rate": 4.678120776315251e-06, "loss": 0.9432, "step": 98 }, { "epoch": 0.21019108280254778, "grad_norm": 0.7266338220692218, "learning_rate": 4.669442739645506e-06, "loss": 1.0112, "step": 99 }, { "epoch": 0.21231422505307856, "grad_norm": 0.6786209555674868, "learning_rate": 4.660657543370958e-06, "loss": 1.0122, "step": 100 }, { "epoch": 0.21443736730360935, "grad_norm": 0.7396143819036078, "learning_rate": 4.651765621436303e-06, "loss": 1.0491, "step": 101 }, { "epoch": 0.21656050955414013, "grad_norm": 0.745708791916274, "learning_rate": 4.642767413057942e-06, "loss": 0.966, "step": 102 }, { "epoch": 0.21868365180467092, "grad_norm": 0.7477607378275345, "learning_rate": 4.6336633627023e-06, "loss": 0.9129, "step": 103 }, { "epoch": 0.2208067940552017, "grad_norm": 0.6930283267824098, "learning_rate": 4.624453920063863e-06, "loss": 0.9769, "step": 104 }, { "epoch": 0.2229299363057325, "grad_norm": 0.6762283490594463, "learning_rate": 4.6151395400429665e-06, "loss": 0.9383, "step": 105 }, { "epoch": 0.22505307855626328, "grad_norm": 0.7313509583041526, "learning_rate": 4.605720682723331e-06, "loss": 0.9428, "step": 106 }, { "epoch": 0.22717622080679406, "grad_norm": 0.7156367191749474, "learning_rate": 4.596197813349328e-06, "loss": 0.9834, "step": 107 }, { "epoch": 0.22929936305732485, "grad_norm": 0.6828464400232375, "learning_rate": 4.586571402303006e-06, "loss": 0.9504, "step": 108 }, { "epoch": 0.23142250530785563, "grad_norm": 0.74716164026175, "learning_rate": 4.576841925080853e-06, "loss": 0.9998, "step": 109 }, { "epoch": 0.23354564755838642, "grad_norm": 0.6856088912154495, "learning_rate": 4.56700986227031e-06, "loss": 1.0149, "step": 110 }, { "epoch": 0.2356687898089172, "grad_norm": 0.7033606189520333, "learning_rate": 4.557075699526032e-06, "loss": 0.9584, "step": 111 }, { "epoch": 0.23779193205944799, "grad_norm": 0.7043436392355145, "learning_rate": 4.547039927545899e-06, "loss": 0.9779, "step": 112 }, { "epoch": 0.23991507430997877, "grad_norm": 0.8202453116913221, "learning_rate": 4.536903042046778e-06, "loss": 0.9505, "step": 113 }, { "epoch": 0.24203821656050956, "grad_norm": 0.7436505838525592, "learning_rate": 4.526665543740038e-06, "loss": 0.9561, "step": 114 }, { "epoch": 0.24416135881104034, "grad_norm": 0.6478361281734111, "learning_rate": 4.516327938306818e-06, "loss": 1.0084, "step": 115 }, { "epoch": 0.24628450106157113, "grad_norm": 0.7674555846640165, "learning_rate": 4.505890736373045e-06, "loss": 0.9767, "step": 116 }, { "epoch": 0.2484076433121019, "grad_norm": 0.7184796384578326, "learning_rate": 4.495354453484216e-06, "loss": 0.9801, "step": 117 }, { "epoch": 0.2505307855626327, "grad_norm": 0.677609956598202, "learning_rate": 4.4847196100799305e-06, "loss": 0.9825, "step": 118 }, { "epoch": 0.2526539278131635, "grad_norm": 0.7123651122668029, "learning_rate": 4.473986731468183e-06, "loss": 1.0107, "step": 119 }, { "epoch": 0.25477707006369427, "grad_norm": 0.7156231222559138, "learning_rate": 4.463156347799419e-06, "loss": 1.0779, "step": 120 }, { "epoch": 0.25690021231422505, "grad_norm": 0.7717775840138449, "learning_rate": 4.452228994040341e-06, "loss": 0.9622, "step": 121 }, { "epoch": 0.25902335456475584, "grad_norm": 0.7512072014948016, "learning_rate": 4.4412052099474916e-06, "loss": 0.9625, "step": 122 }, { "epoch": 0.2611464968152866, "grad_norm": 0.7124270062225315, "learning_rate": 4.430085540040587e-06, "loss": 1.0169, "step": 123 }, { "epoch": 0.2632696390658174, "grad_norm": 0.7058601891740411, "learning_rate": 4.418870533575626e-06, "loss": 0.9819, "step": 124 }, { "epoch": 0.2653927813163482, "grad_norm": 0.6903185636770165, "learning_rate": 4.40756074451775e-06, "loss": 0.9398, "step": 125 }, { "epoch": 0.267515923566879, "grad_norm": 0.7046682682406727, "learning_rate": 4.396156731513888e-06, "loss": 1.0341, "step": 126 }, { "epoch": 0.26963906581740976, "grad_norm": 0.7862567299960644, "learning_rate": 4.384659057865165e-06, "loss": 0.947, "step": 127 }, { "epoch": 0.27176220806794055, "grad_norm": 0.6195482427854749, "learning_rate": 4.373068291499065e-06, "loss": 0.8899, "step": 128 }, { "epoch": 0.27388535031847133, "grad_norm": 0.6985398850512, "learning_rate": 4.36138500494139e-06, "loss": 0.9726, "step": 129 }, { "epoch": 0.2760084925690021, "grad_norm": 0.668756553679393, "learning_rate": 4.349609775287977e-06, "loss": 0.9855, "step": 130 }, { "epoch": 0.2781316348195329, "grad_norm": 0.6581795928554366, "learning_rate": 4.337743184176188e-06, "loss": 0.9596, "step": 131 }, { "epoch": 0.2802547770700637, "grad_norm": 0.6337202167292365, "learning_rate": 4.325785817756186e-06, "loss": 0.9839, "step": 132 }, { "epoch": 0.2823779193205945, "grad_norm": 0.6680059124711757, "learning_rate": 4.313738266661979e-06, "loss": 0.9324, "step": 133 }, { "epoch": 0.28450106157112526, "grad_norm": 0.6598271036519225, "learning_rate": 4.301601125982246e-06, "loss": 1.0066, "step": 134 }, { "epoch": 0.28662420382165604, "grad_norm": 0.7183648150650218, "learning_rate": 4.289374995230942e-06, "loss": 1.0136, "step": 135 }, { "epoch": 0.28874734607218683, "grad_norm": 0.742221341756798, "learning_rate": 4.277060478317687e-06, "loss": 1.0008, "step": 136 }, { "epoch": 0.2908704883227176, "grad_norm": 0.6824304036868553, "learning_rate": 4.264658183517935e-06, "loss": 0.9381, "step": 137 }, { "epoch": 0.2929936305732484, "grad_norm": 0.6505573173225403, "learning_rate": 4.252168723442927e-06, "loss": 0.9489, "step": 138 }, { "epoch": 0.2951167728237792, "grad_norm": 0.6950634835305272, "learning_rate": 4.23959271500943e-06, "loss": 0.9373, "step": 139 }, { "epoch": 0.29723991507430997, "grad_norm": 0.6872675526003413, "learning_rate": 4.226930779409271e-06, "loss": 0.9984, "step": 140 }, { "epoch": 0.29936305732484075, "grad_norm": 0.7693317009374261, "learning_rate": 4.214183542078646e-06, "loss": 0.9391, "step": 141 }, { "epoch": 0.30148619957537154, "grad_norm": 0.6809609221970698, "learning_rate": 4.201351632667227e-06, "loss": 1.0107, "step": 142 }, { "epoch": 0.3036093418259023, "grad_norm": 0.6776804857260985, "learning_rate": 4.1884356850070695e-06, "loss": 1.0316, "step": 143 }, { "epoch": 0.3057324840764331, "grad_norm": 0.6858474793512133, "learning_rate": 4.175436337081289e-06, "loss": 0.9071, "step": 144 }, { "epoch": 0.3078556263269639, "grad_norm": 0.7262200113022307, "learning_rate": 4.162354230992562e-06, "loss": 0.9751, "step": 145 }, { "epoch": 0.3099787685774947, "grad_norm": 0.7048851306107318, "learning_rate": 4.149190012931402e-06, "loss": 0.9222, "step": 146 }, { "epoch": 0.31210191082802546, "grad_norm": 0.7661302534267068, "learning_rate": 4.135944333144244e-06, "loss": 0.9493, "step": 147 }, { "epoch": 0.31422505307855625, "grad_norm": 0.6647279751401796, "learning_rate": 4.122617845901322e-06, "loss": 1.0373, "step": 148 }, { "epoch": 0.31634819532908703, "grad_norm": 0.677884233878186, "learning_rate": 4.109211209464354e-06, "loss": 1.0299, "step": 149 }, { "epoch": 0.3184713375796178, "grad_norm": 0.6808985503614562, "learning_rate": 4.095725086054029e-06, "loss": 0.9564, "step": 150 }, { "epoch": 0.3205944798301486, "grad_norm": 0.6844572260666089, "learning_rate": 4.0821601418172926e-06, "loss": 0.9942, "step": 151 }, { "epoch": 0.3227176220806794, "grad_norm": 0.687636477644006, "learning_rate": 4.068517046794443e-06, "loss": 0.9925, "step": 152 }, { "epoch": 0.3248407643312102, "grad_norm": 0.7121462034889847, "learning_rate": 4.054796474886038e-06, "loss": 1.0232, "step": 153 }, { "epoch": 0.32696390658174096, "grad_norm": 0.6791139957154484, "learning_rate": 4.040999103819606e-06, "loss": 1.0147, "step": 154 }, { "epoch": 0.32908704883227174, "grad_norm": 0.6317316423507299, "learning_rate": 4.0271256151161666e-06, "loss": 1.0073, "step": 155 }, { "epoch": 0.33121019108280253, "grad_norm": 0.7021672955838567, "learning_rate": 4.013176694056571e-06, "loss": 0.9892, "step": 156 }, { "epoch": 0.3333333333333333, "grad_norm": 0.6535581508427883, "learning_rate": 3.999153029647651e-06, "loss": 1.0406, "step": 157 } ], "logging_steps": 1, "max_steps": 471, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 157, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 23254277554176.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }