|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 351, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002849002849002849, |
|
"grad_norm": 1.4838141202926636, |
|
"learning_rate": 1e-05, |
|
"loss": 2.366, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005698005698005698, |
|
"grad_norm": 1.4262256622314453, |
|
"learning_rate": 9.971509971509972e-06, |
|
"loss": 2.4139, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008547008547008548, |
|
"grad_norm": 1.3603845834732056, |
|
"learning_rate": 9.943019943019944e-06, |
|
"loss": 2.2811, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011396011396011397, |
|
"grad_norm": 1.307405710220337, |
|
"learning_rate": 9.914529914529915e-06, |
|
"loss": 2.2546, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014245014245014245, |
|
"grad_norm": 1.2675021886825562, |
|
"learning_rate": 9.886039886039887e-06, |
|
"loss": 2.2322, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017094017094017096, |
|
"grad_norm": 1.2081060409545898, |
|
"learning_rate": 9.857549857549858e-06, |
|
"loss": 2.2255, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019943019943019943, |
|
"grad_norm": 1.104535698890686, |
|
"learning_rate": 9.82905982905983e-06, |
|
"loss": 2.2139, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.022792022792022793, |
|
"grad_norm": 1.0799970626831055, |
|
"learning_rate": 9.800569800569801e-06, |
|
"loss": 2.2049, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 0.9630372524261475, |
|
"learning_rate": 9.772079772079773e-06, |
|
"loss": 2.1045, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02849002849002849, |
|
"grad_norm": 0.9740710854530334, |
|
"learning_rate": 9.743589743589744e-06, |
|
"loss": 2.1669, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03133903133903134, |
|
"grad_norm": 0.9871430397033691, |
|
"learning_rate": 9.715099715099716e-06, |
|
"loss": 2.1666, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03418803418803419, |
|
"grad_norm": 0.9979017376899719, |
|
"learning_rate": 9.686609686609687e-06, |
|
"loss": 2.2347, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.037037037037037035, |
|
"grad_norm": 0.921946108341217, |
|
"learning_rate": 9.658119658119659e-06, |
|
"loss": 2.1309, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.039886039886039885, |
|
"grad_norm": 0.9126842617988586, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 2.0519, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.042735042735042736, |
|
"grad_norm": 0.8587276935577393, |
|
"learning_rate": 9.601139601139601e-06, |
|
"loss": 2.0816, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.045584045584045586, |
|
"grad_norm": 0.8564528822898865, |
|
"learning_rate": 9.572649572649575e-06, |
|
"loss": 2.0918, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04843304843304843, |
|
"grad_norm": 0.8116742968559265, |
|
"learning_rate": 9.544159544159544e-06, |
|
"loss": 1.9883, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 0.7653638124465942, |
|
"learning_rate": 9.515669515669516e-06, |
|
"loss": 2.008, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05413105413105413, |
|
"grad_norm": 0.758541464805603, |
|
"learning_rate": 9.487179487179487e-06, |
|
"loss": 2.0232, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05698005698005698, |
|
"grad_norm": 0.7756889462471008, |
|
"learning_rate": 9.458689458689459e-06, |
|
"loss": 2.0479, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05982905982905983, |
|
"grad_norm": 0.8094788789749146, |
|
"learning_rate": 9.430199430199432e-06, |
|
"loss": 2.0904, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06267806267806268, |
|
"grad_norm": 0.6886956691741943, |
|
"learning_rate": 9.401709401709402e-06, |
|
"loss": 1.9566, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06552706552706553, |
|
"grad_norm": 0.6763948798179626, |
|
"learning_rate": 9.373219373219375e-06, |
|
"loss": 1.9283, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06837606837606838, |
|
"grad_norm": 0.6754049062728882, |
|
"learning_rate": 9.344729344729345e-06, |
|
"loss": 1.9353, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07122507122507123, |
|
"grad_norm": 0.6518625617027283, |
|
"learning_rate": 9.316239316239318e-06, |
|
"loss": 1.9165, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 0.6429179906845093, |
|
"learning_rate": 9.287749287749288e-06, |
|
"loss": 1.9261, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 0.6808933019638062, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 1.9784, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07977207977207977, |
|
"grad_norm": 0.7099093198776245, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.9884, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08262108262108261, |
|
"grad_norm": 0.6004197597503662, |
|
"learning_rate": 9.202279202279202e-06, |
|
"loss": 1.8912, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 0.5550093650817871, |
|
"learning_rate": 9.173789173789175e-06, |
|
"loss": 1.7948, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08831908831908832, |
|
"grad_norm": 0.6177744269371033, |
|
"learning_rate": 9.145299145299145e-06, |
|
"loss": 1.9004, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09116809116809117, |
|
"grad_norm": 0.5736203789710999, |
|
"learning_rate": 9.116809116809118e-06, |
|
"loss": 1.8579, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09401709401709402, |
|
"grad_norm": 0.5455344915390015, |
|
"learning_rate": 9.088319088319088e-06, |
|
"loss": 1.8232, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09686609686609686, |
|
"grad_norm": 0.5457695126533508, |
|
"learning_rate": 9.059829059829061e-06, |
|
"loss": 1.8387, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09971509971509972, |
|
"grad_norm": 0.6495256423950195, |
|
"learning_rate": 9.031339031339033e-06, |
|
"loss": 1.9007, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 0.553978443145752, |
|
"learning_rate": 9.002849002849004e-06, |
|
"loss": 1.7967, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10541310541310542, |
|
"grad_norm": 0.6648301482200623, |
|
"learning_rate": 8.974358974358976e-06, |
|
"loss": 1.8203, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10826210826210826, |
|
"grad_norm": 0.604141354560852, |
|
"learning_rate": 8.945868945868947e-06, |
|
"loss": 1.8569, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 0.5134737491607666, |
|
"learning_rate": 8.917378917378919e-06, |
|
"loss": 1.7601, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11396011396011396, |
|
"grad_norm": 0.5309232473373413, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.8382, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1168091168091168, |
|
"grad_norm": 0.5077832937240601, |
|
"learning_rate": 8.860398860398861e-06, |
|
"loss": 1.7595, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11965811965811966, |
|
"grad_norm": 0.511060357093811, |
|
"learning_rate": 8.831908831908833e-06, |
|
"loss": 1.7981, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1225071225071225, |
|
"grad_norm": 0.48027244210243225, |
|
"learning_rate": 8.803418803418804e-06, |
|
"loss": 1.726, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12535612535612536, |
|
"grad_norm": 0.4738457202911377, |
|
"learning_rate": 8.774928774928776e-06, |
|
"loss": 1.7552, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 0.4702482223510742, |
|
"learning_rate": 8.746438746438747e-06, |
|
"loss": 1.7324, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13105413105413105, |
|
"grad_norm": 0.48187750577926636, |
|
"learning_rate": 8.717948717948719e-06, |
|
"loss": 1.7393, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1339031339031339, |
|
"grad_norm": 0.46382951736450195, |
|
"learning_rate": 8.68945868945869e-06, |
|
"loss": 1.7103, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"grad_norm": 0.5777999758720398, |
|
"learning_rate": 8.660968660968662e-06, |
|
"loss": 1.7991, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1396011396011396, |
|
"grad_norm": 0.46543341875076294, |
|
"learning_rate": 8.632478632478633e-06, |
|
"loss": 1.7483, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14245014245014245, |
|
"grad_norm": 0.5707411766052246, |
|
"learning_rate": 8.603988603988605e-06, |
|
"loss": 1.7243, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1452991452991453, |
|
"grad_norm": 0.5121602416038513, |
|
"learning_rate": 8.575498575498576e-06, |
|
"loss": 1.7487, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 0.45368504524230957, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 1.7515, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.150997150997151, |
|
"grad_norm": 0.44115832448005676, |
|
"learning_rate": 8.518518518518519e-06, |
|
"loss": 1.7165, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 0.43293115496635437, |
|
"learning_rate": 8.49002849002849e-06, |
|
"loss": 1.7247, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15669515669515668, |
|
"grad_norm": 0.4369884431362152, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 1.7046, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15954415954415954, |
|
"grad_norm": 0.44155994057655334, |
|
"learning_rate": 8.433048433048434e-06, |
|
"loss": 1.7397, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1623931623931624, |
|
"grad_norm": 0.4158068895339966, |
|
"learning_rate": 8.404558404558405e-06, |
|
"loss": 1.7038, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.16524216524216523, |
|
"grad_norm": 0.4057186245918274, |
|
"learning_rate": 8.376068376068377e-06, |
|
"loss": 1.6994, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16809116809116809, |
|
"grad_norm": 0.4907611906528473, |
|
"learning_rate": 8.347578347578348e-06, |
|
"loss": 1.7246, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 0.4695189595222473, |
|
"learning_rate": 8.31908831908832e-06, |
|
"loss": 1.7315, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1737891737891738, |
|
"grad_norm": 0.40382280945777893, |
|
"learning_rate": 8.290598290598293e-06, |
|
"loss": 1.6904, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17663817663817663, |
|
"grad_norm": 0.42537087202072144, |
|
"learning_rate": 8.262108262108262e-06, |
|
"loss": 1.7413, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1794871794871795, |
|
"grad_norm": 0.45500096678733826, |
|
"learning_rate": 8.233618233618234e-06, |
|
"loss": 1.687, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18233618233618235, |
|
"grad_norm": 0.5165032148361206, |
|
"learning_rate": 8.205128205128205e-06, |
|
"loss": 1.6565, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 0.4045052230358124, |
|
"learning_rate": 8.176638176638177e-06, |
|
"loss": 1.679, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18803418803418803, |
|
"grad_norm": 0.5608129501342773, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 1.6782, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1908831908831909, |
|
"grad_norm": 0.42527124285697937, |
|
"learning_rate": 8.11965811965812e-06, |
|
"loss": 1.6164, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.19373219373219372, |
|
"grad_norm": 0.39863091707229614, |
|
"learning_rate": 8.091168091168093e-06, |
|
"loss": 1.6564, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.19658119658119658, |
|
"grad_norm": 0.40516364574432373, |
|
"learning_rate": 8.062678062678063e-06, |
|
"loss": 1.5941, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19943019943019943, |
|
"grad_norm": 0.42938536405563354, |
|
"learning_rate": 8.034188034188036e-06, |
|
"loss": 1.6471, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2022792022792023, |
|
"grad_norm": 0.3754700720310211, |
|
"learning_rate": 8.005698005698006e-06, |
|
"loss": 1.637, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 0.4259706735610962, |
|
"learning_rate": 7.977207977207979e-06, |
|
"loss": 1.6063, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.20797720797720798, |
|
"grad_norm": 0.41146427392959595, |
|
"learning_rate": 7.948717948717949e-06, |
|
"loss": 1.6559, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.21082621082621084, |
|
"grad_norm": 0.3858882486820221, |
|
"learning_rate": 7.92022792022792e-06, |
|
"loss": 1.6355, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"grad_norm": 0.46363890171051025, |
|
"learning_rate": 7.891737891737893e-06, |
|
"loss": 1.7049, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21652421652421652, |
|
"grad_norm": 0.40698277950286865, |
|
"learning_rate": 7.863247863247863e-06, |
|
"loss": 1.6436, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21937321937321938, |
|
"grad_norm": 0.3834919035434723, |
|
"learning_rate": 7.834757834757836e-06, |
|
"loss": 1.5902, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.3849916160106659, |
|
"learning_rate": 7.806267806267806e-06, |
|
"loss": 1.6127, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22507122507122507, |
|
"grad_norm": 0.6278889179229736, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.6641, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22792022792022792, |
|
"grad_norm": 0.4905427396297455, |
|
"learning_rate": 7.749287749287749e-06, |
|
"loss": 1.5825, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 0.4097338318824768, |
|
"learning_rate": 7.720797720797722e-06, |
|
"loss": 1.5964, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2336182336182336, |
|
"grad_norm": 0.3952528238296509, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.6584, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.23646723646723647, |
|
"grad_norm": 0.38913223147392273, |
|
"learning_rate": 7.663817663817665e-06, |
|
"loss": 1.5694, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23931623931623933, |
|
"grad_norm": 0.391777902841568, |
|
"learning_rate": 7.635327635327637e-06, |
|
"loss": 1.6378, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.24216524216524216, |
|
"grad_norm": 0.41954416036605835, |
|
"learning_rate": 7.606837606837607e-06, |
|
"loss": 1.6073, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.245014245014245, |
|
"grad_norm": 0.3974544107913971, |
|
"learning_rate": 7.578347578347579e-06, |
|
"loss": 1.6005, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.24786324786324787, |
|
"grad_norm": 0.43366730213165283, |
|
"learning_rate": 7.54985754985755e-06, |
|
"loss": 1.5905, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.25071225071225073, |
|
"grad_norm": 0.37673377990722656, |
|
"learning_rate": 7.521367521367522e-06, |
|
"loss": 1.562, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2535612535612536, |
|
"grad_norm": 0.48865458369255066, |
|
"learning_rate": 7.492877492877494e-06, |
|
"loss": 1.5934, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 0.38269999623298645, |
|
"learning_rate": 7.4643874643874645e-06, |
|
"loss": 1.6024, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25925925925925924, |
|
"grad_norm": 0.40311211347579956, |
|
"learning_rate": 7.435897435897437e-06, |
|
"loss": 1.6263, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2621082621082621, |
|
"grad_norm": 0.3799367845058441, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 1.599, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.26495726495726496, |
|
"grad_norm": 0.39559420943260193, |
|
"learning_rate": 7.37891737891738e-06, |
|
"loss": 1.6103, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2678062678062678, |
|
"grad_norm": 0.37981730699539185, |
|
"learning_rate": 7.350427350427351e-06, |
|
"loss": 1.598, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2706552706552707, |
|
"grad_norm": 0.3881866931915283, |
|
"learning_rate": 7.321937321937323e-06, |
|
"loss": 1.5843, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"grad_norm": 0.3740154504776001, |
|
"learning_rate": 7.293447293447294e-06, |
|
"loss": 1.6069, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.27635327635327633, |
|
"grad_norm": 0.3980708718299866, |
|
"learning_rate": 7.264957264957266e-06, |
|
"loss": 1.5667, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2792022792022792, |
|
"grad_norm": 0.37536391615867615, |
|
"learning_rate": 7.236467236467237e-06, |
|
"loss": 1.5926, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.28205128205128205, |
|
"grad_norm": 0.4172308146953583, |
|
"learning_rate": 7.207977207977208e-06, |
|
"loss": 1.5371, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2849002849002849, |
|
"grad_norm": 0.39715775847435, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.5931, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28774928774928776, |
|
"grad_norm": 0.4845562279224396, |
|
"learning_rate": 7.1509971509971524e-06, |
|
"loss": 1.6267, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2905982905982906, |
|
"grad_norm": 0.38772156834602356, |
|
"learning_rate": 7.122507122507123e-06, |
|
"loss": 1.5949, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2934472934472934, |
|
"grad_norm": 0.3815441429615021, |
|
"learning_rate": 7.0940170940170945e-06, |
|
"loss": 1.5758, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 0.4964717626571655, |
|
"learning_rate": 7.065527065527066e-06, |
|
"loss": 1.5653, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.29914529914529914, |
|
"grad_norm": 0.378212571144104, |
|
"learning_rate": 7.0370370370370375e-06, |
|
"loss": 1.536, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.301994301994302, |
|
"grad_norm": 0.36918291449546814, |
|
"learning_rate": 7.008547008547009e-06, |
|
"loss": 1.555, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.30484330484330485, |
|
"grad_norm": 0.39171653985977173, |
|
"learning_rate": 6.9800569800569804e-06, |
|
"loss": 1.6057, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 0.5356259942054749, |
|
"learning_rate": 6.951566951566953e-06, |
|
"loss": 1.5825, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.31054131054131057, |
|
"grad_norm": 0.40925300121307373, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 1.6084, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.31339031339031337, |
|
"grad_norm": 0.3943912386894226, |
|
"learning_rate": 6.894586894586896e-06, |
|
"loss": 1.5231, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3162393162393162, |
|
"grad_norm": 0.40087035298347473, |
|
"learning_rate": 6.866096866096866e-06, |
|
"loss": 1.5833, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3190883190883191, |
|
"grad_norm": 0.3822116553783417, |
|
"learning_rate": 6.837606837606839e-06, |
|
"loss": 1.5477, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.32193732193732194, |
|
"grad_norm": 0.39919513463974, |
|
"learning_rate": 6.809116809116809e-06, |
|
"loss": 1.555, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3247863247863248, |
|
"grad_norm": 0.39128148555755615, |
|
"learning_rate": 6.780626780626781e-06, |
|
"loss": 1.5886, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.32763532763532766, |
|
"grad_norm": 0.3694957196712494, |
|
"learning_rate": 6.752136752136753e-06, |
|
"loss": 1.4937, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.33048433048433046, |
|
"grad_norm": 0.4147852659225464, |
|
"learning_rate": 6.723646723646724e-06, |
|
"loss": 1.5697, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.4091155230998993, |
|
"learning_rate": 6.695156695156696e-06, |
|
"loss": 1.511, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.33618233618233617, |
|
"grad_norm": 0.3905634582042694, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.5462, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.33903133903133903, |
|
"grad_norm": 0.4323817491531372, |
|
"learning_rate": 6.638176638176639e-06, |
|
"loss": 1.5459, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 0.38668230175971985, |
|
"learning_rate": 6.60968660968661e-06, |
|
"loss": 1.5664, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34472934472934474, |
|
"grad_norm": 0.4649519622325897, |
|
"learning_rate": 6.581196581196582e-06, |
|
"loss": 1.5827, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3475783475783476, |
|
"grad_norm": 0.4004313051700592, |
|
"learning_rate": 6.552706552706553e-06, |
|
"loss": 1.4653, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3504273504273504, |
|
"grad_norm": 0.3949541449546814, |
|
"learning_rate": 6.524216524216525e-06, |
|
"loss": 1.5285, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.35327635327635326, |
|
"grad_norm": 0.6077877283096313, |
|
"learning_rate": 6.495726495726496e-06, |
|
"loss": 1.5648, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3561253561253561, |
|
"grad_norm": 0.5344558358192444, |
|
"learning_rate": 6.467236467236467e-06, |
|
"loss": 1.5311, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 0.38816729187965393, |
|
"learning_rate": 6.438746438746439e-06, |
|
"loss": 1.5139, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.36182336182336183, |
|
"grad_norm": 0.3926841914653778, |
|
"learning_rate": 6.410256410256412e-06, |
|
"loss": 1.5277, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3646723646723647, |
|
"grad_norm": 0.40280261635780334, |
|
"learning_rate": 6.381766381766382e-06, |
|
"loss": 1.553, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.36752136752136755, |
|
"grad_norm": 0.38559049367904663, |
|
"learning_rate": 6.3532763532763546e-06, |
|
"loss": 1.5269, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 0.38594579696655273, |
|
"learning_rate": 6.324786324786325e-06, |
|
"loss": 1.5185, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3732193732193732, |
|
"grad_norm": 0.372689425945282, |
|
"learning_rate": 6.296296296296297e-06, |
|
"loss": 1.5058, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.37606837606837606, |
|
"grad_norm": 0.3884972333908081, |
|
"learning_rate": 6.267806267806268e-06, |
|
"loss": 1.5255, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3789173789173789, |
|
"grad_norm": 0.40464359521865845, |
|
"learning_rate": 6.23931623931624e-06, |
|
"loss": 1.5212, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3817663817663818, |
|
"grad_norm": 0.4075316786766052, |
|
"learning_rate": 6.210826210826212e-06, |
|
"loss": 1.4987, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 0.41846784949302673, |
|
"learning_rate": 6.1823361823361825e-06, |
|
"loss": 1.5409, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.38746438746438744, |
|
"grad_norm": 0.4159785509109497, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.5393, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3903133903133903, |
|
"grad_norm": 0.3839842975139618, |
|
"learning_rate": 6.1253561253561255e-06, |
|
"loss": 1.5139, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.39316239316239315, |
|
"grad_norm": 0.5279687643051147, |
|
"learning_rate": 6.096866096866098e-06, |
|
"loss": 1.5747, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.396011396011396, |
|
"grad_norm": 0.40492990612983704, |
|
"learning_rate": 6.0683760683760684e-06, |
|
"loss": 1.453, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.39886039886039887, |
|
"grad_norm": 0.41720351576805115, |
|
"learning_rate": 6.039886039886041e-06, |
|
"loss": 1.4864, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4017094017094017, |
|
"grad_norm": 0.3866989016532898, |
|
"learning_rate": 6.011396011396012e-06, |
|
"loss": 1.4723, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4045584045584046, |
|
"grad_norm": 0.38849347829818726, |
|
"learning_rate": 5.982905982905983e-06, |
|
"loss": 1.4842, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4074074074074074, |
|
"grad_norm": 0.5428235530853271, |
|
"learning_rate": 5.954415954415955e-06, |
|
"loss": 1.5338, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 0.3945627808570862, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 1.528, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4131054131054131, |
|
"grad_norm": 0.3996782898902893, |
|
"learning_rate": 5.897435897435898e-06, |
|
"loss": 1.5212, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.41595441595441596, |
|
"grad_norm": 0.4091893136501312, |
|
"learning_rate": 5.868945868945869e-06, |
|
"loss": 1.5419, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4188034188034188, |
|
"grad_norm": 0.3839370906352997, |
|
"learning_rate": 5.840455840455841e-06, |
|
"loss": 1.4778, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.42165242165242167, |
|
"grad_norm": 0.3939463496208191, |
|
"learning_rate": 5.8119658119658126e-06, |
|
"loss": 1.4912, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.42450142450142453, |
|
"grad_norm": 0.5488878488540649, |
|
"learning_rate": 5.783475783475784e-06, |
|
"loss": 1.459, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 0.6062666773796082, |
|
"learning_rate": 5.7549857549857555e-06, |
|
"loss": 1.4166, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4301994301994302, |
|
"grad_norm": 0.5629584193229675, |
|
"learning_rate": 5.726495726495727e-06, |
|
"loss": 1.4818, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.43304843304843305, |
|
"grad_norm": 0.41644972562789917, |
|
"learning_rate": 5.6980056980056985e-06, |
|
"loss": 1.4625, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4358974358974359, |
|
"grad_norm": 0.4007890820503235, |
|
"learning_rate": 5.669515669515669e-06, |
|
"loss": 1.4898, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.43874643874643876, |
|
"grad_norm": 0.5906901359558105, |
|
"learning_rate": 5.641025641025641e-06, |
|
"loss": 1.5235, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4415954415954416, |
|
"grad_norm": 0.5607777237892151, |
|
"learning_rate": 5.612535612535614e-06, |
|
"loss": 1.5234, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.3959032893180847, |
|
"learning_rate": 5.584045584045584e-06, |
|
"loss": 1.4788, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4472934472934473, |
|
"grad_norm": 0.4064564108848572, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.503, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.45014245014245013, |
|
"grad_norm": 0.39798179268836975, |
|
"learning_rate": 5.527065527065527e-06, |
|
"loss": 1.5001, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.452991452991453, |
|
"grad_norm": 0.45741236209869385, |
|
"learning_rate": 5.498575498575499e-06, |
|
"loss": 1.5012, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.45584045584045585, |
|
"grad_norm": 0.45142683386802673, |
|
"learning_rate": 5.470085470085471e-06, |
|
"loss": 1.5039, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4586894586894587, |
|
"grad_norm": 0.39934027194976807, |
|
"learning_rate": 5.441595441595442e-06, |
|
"loss": 1.4824, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 0.3966750502586365, |
|
"learning_rate": 5.413105413105414e-06, |
|
"loss": 1.4791, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.46438746438746437, |
|
"grad_norm": 0.4393257200717926, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 1.449, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4672364672364672, |
|
"grad_norm": 0.42632415890693665, |
|
"learning_rate": 5.356125356125357e-06, |
|
"loss": 1.5248, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4700854700854701, |
|
"grad_norm": 0.41508087515830994, |
|
"learning_rate": 5.327635327635328e-06, |
|
"loss": 1.4873, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.47293447293447294, |
|
"grad_norm": 0.4311036467552185, |
|
"learning_rate": 5.2991452991453e-06, |
|
"loss": 1.4981, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4757834757834758, |
|
"grad_norm": 0.39872288703918457, |
|
"learning_rate": 5.270655270655271e-06, |
|
"loss": 1.4905, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.47863247863247865, |
|
"grad_norm": 0.412751168012619, |
|
"learning_rate": 5.242165242165243e-06, |
|
"loss": 1.4728, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.48148148148148145, |
|
"grad_norm": 0.40860670804977417, |
|
"learning_rate": 5.213675213675214e-06, |
|
"loss": 1.4986, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4843304843304843, |
|
"grad_norm": 0.4355701208114624, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 1.5109, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.48717948717948717, |
|
"grad_norm": 0.43395113945007324, |
|
"learning_rate": 5.156695156695157e-06, |
|
"loss": 1.4995, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.49002849002849, |
|
"grad_norm": 0.43208786845207214, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 1.4399, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4928774928774929, |
|
"grad_norm": 0.40610820055007935, |
|
"learning_rate": 5.0997150997151e-06, |
|
"loss": 1.4794, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.49572649572649574, |
|
"grad_norm": 0.40242278575897217, |
|
"learning_rate": 5.071225071225072e-06, |
|
"loss": 1.4634, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4985754985754986, |
|
"grad_norm": 0.39585167169570923, |
|
"learning_rate": 5.042735042735043e-06, |
|
"loss": 1.4701, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5014245014245015, |
|
"grad_norm": 0.43933385610580444, |
|
"learning_rate": 5.014245014245015e-06, |
|
"loss": 1.4759, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5042735042735043, |
|
"grad_norm": 0.5048877000808716, |
|
"learning_rate": 4.985754985754986e-06, |
|
"loss": 1.4405, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5071225071225072, |
|
"grad_norm": 0.45279544591903687, |
|
"learning_rate": 4.957264957264958e-06, |
|
"loss": 1.5182, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.50997150997151, |
|
"grad_norm": 0.40896686911582947, |
|
"learning_rate": 4.928774928774929e-06, |
|
"loss": 1.4857, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 0.6420154571533203, |
|
"learning_rate": 4.9002849002849006e-06, |
|
"loss": 1.4331, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5156695156695157, |
|
"grad_norm": 0.45687025785446167, |
|
"learning_rate": 4.871794871794872e-06, |
|
"loss": 1.4716, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5185185185185185, |
|
"grad_norm": 0.4174126088619232, |
|
"learning_rate": 4.8433048433048435e-06, |
|
"loss": 1.4636, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5213675213675214, |
|
"grad_norm": 0.3912286162376404, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 1.4534, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5242165242165242, |
|
"grad_norm": 0.44232121109962463, |
|
"learning_rate": 4.786324786324787e-06, |
|
"loss": 1.4286, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5270655270655271, |
|
"grad_norm": 0.4259029030799866, |
|
"learning_rate": 4.757834757834758e-06, |
|
"loss": 1.5174, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5299145299145299, |
|
"grad_norm": 0.39745402336120605, |
|
"learning_rate": 4.729344729344729e-06, |
|
"loss": 1.4393, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5327635327635327, |
|
"grad_norm": 0.7201390266418457, |
|
"learning_rate": 4.700854700854701e-06, |
|
"loss": 1.5721, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5356125356125356, |
|
"grad_norm": 0.42101916670799255, |
|
"learning_rate": 4.672364672364672e-06, |
|
"loss": 1.4847, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 0.4132574498653412, |
|
"learning_rate": 4.643874643874644e-06, |
|
"loss": 1.4632, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5413105413105413, |
|
"grad_norm": 0.44261249899864197, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 1.4767, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5441595441595442, |
|
"grad_norm": 0.4636523723602295, |
|
"learning_rate": 4.586894586894588e-06, |
|
"loss": 1.4868, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5470085470085471, |
|
"grad_norm": 0.4402620792388916, |
|
"learning_rate": 4.558404558404559e-06, |
|
"loss": 1.5096, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5498575498575499, |
|
"grad_norm": 0.46384042501449585, |
|
"learning_rate": 4.5299145299145306e-06, |
|
"loss": 1.5022, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5527065527065527, |
|
"grad_norm": 0.4248226583003998, |
|
"learning_rate": 4.501424501424502e-06, |
|
"loss": 1.4968, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.41844654083251953, |
|
"learning_rate": 4.4729344729344735e-06, |
|
"loss": 1.4441, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5584045584045584, |
|
"grad_norm": 0.4129433035850525, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.4598, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5612535612535613, |
|
"grad_norm": 0.4882029891014099, |
|
"learning_rate": 4.4159544159544165e-06, |
|
"loss": 1.5211, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 0.4571973979473114, |
|
"learning_rate": 4.387464387464388e-06, |
|
"loss": 1.4964, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5669515669515669, |
|
"grad_norm": 0.4153326451778412, |
|
"learning_rate": 4.358974358974359e-06, |
|
"loss": 1.4912, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5698005698005698, |
|
"grad_norm": 0.41810521483421326, |
|
"learning_rate": 4.330484330484331e-06, |
|
"loss": 1.4881, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5726495726495726, |
|
"grad_norm": 0.43121734261512756, |
|
"learning_rate": 4.301994301994302e-06, |
|
"loss": 1.4489, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5754985754985755, |
|
"grad_norm": 0.39392393827438354, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 1.4354, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5783475783475783, |
|
"grad_norm": 0.4206382632255554, |
|
"learning_rate": 4.245014245014245e-06, |
|
"loss": 1.4294, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5811965811965812, |
|
"grad_norm": 0.7128792405128479, |
|
"learning_rate": 4.216524216524217e-06, |
|
"loss": 1.4796, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.584045584045584, |
|
"grad_norm": 0.42449796199798584, |
|
"learning_rate": 4.188034188034188e-06, |
|
"loss": 1.44, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5868945868945868, |
|
"grad_norm": 0.40819981694221497, |
|
"learning_rate": 4.15954415954416e-06, |
|
"loss": 1.4674, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5897435897435898, |
|
"grad_norm": 0.4191708564758301, |
|
"learning_rate": 4.131054131054131e-06, |
|
"loss": 1.4231, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 0.4241287410259247, |
|
"learning_rate": 4.102564102564103e-06, |
|
"loss": 1.4841, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5954415954415955, |
|
"grad_norm": 0.4283653795719147, |
|
"learning_rate": 4.074074074074074e-06, |
|
"loss": 1.4251, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5982905982905983, |
|
"grad_norm": 0.41446876525878906, |
|
"learning_rate": 4.0455840455840465e-06, |
|
"loss": 1.4496, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6011396011396012, |
|
"grad_norm": 0.4163020849227905, |
|
"learning_rate": 4.017094017094018e-06, |
|
"loss": 1.4273, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.603988603988604, |
|
"grad_norm": 0.42851346731185913, |
|
"learning_rate": 3.9886039886039894e-06, |
|
"loss": 1.4727, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6068376068376068, |
|
"grad_norm": 0.4239060878753662, |
|
"learning_rate": 3.96011396011396e-06, |
|
"loss": 1.4318, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6096866096866097, |
|
"grad_norm": 0.40873628854751587, |
|
"learning_rate": 3.9316239316239315e-06, |
|
"loss": 1.4548, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6125356125356125, |
|
"grad_norm": 0.45280134677886963, |
|
"learning_rate": 3.903133903133903e-06, |
|
"loss": 1.4932, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 0.6247657537460327, |
|
"learning_rate": 3.8746438746438745e-06, |
|
"loss": 1.4499, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6182336182336182, |
|
"grad_norm": 0.4122682511806488, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.4218, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6210826210826211, |
|
"grad_norm": 0.40727391839027405, |
|
"learning_rate": 3.817663817663818e-06, |
|
"loss": 1.4726, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6239316239316239, |
|
"grad_norm": 0.4725242555141449, |
|
"learning_rate": 3.7891737891737893e-06, |
|
"loss": 1.4214, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6267806267806267, |
|
"grad_norm": 0.45712363719940186, |
|
"learning_rate": 3.760683760683761e-06, |
|
"loss": 1.4518, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6296296296296297, |
|
"grad_norm": 0.40573611855506897, |
|
"learning_rate": 3.7321937321937323e-06, |
|
"loss": 1.459, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6324786324786325, |
|
"grad_norm": 0.4086320400238037, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.4395, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6353276353276354, |
|
"grad_norm": 0.4158555567264557, |
|
"learning_rate": 3.6752136752136756e-06, |
|
"loss": 1.4436, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6381766381766382, |
|
"grad_norm": 0.5216575264930725, |
|
"learning_rate": 3.646723646723647e-06, |
|
"loss": 1.4659, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 0.394228994846344, |
|
"learning_rate": 3.6182336182336186e-06, |
|
"loss": 1.4637, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6438746438746439, |
|
"grad_norm": 0.41643351316452026, |
|
"learning_rate": 3.58974358974359e-06, |
|
"loss": 1.4298, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6467236467236467, |
|
"grad_norm": 0.407087117433548, |
|
"learning_rate": 3.5612535612535615e-06, |
|
"loss": 1.4426, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6495726495726496, |
|
"grad_norm": 0.47986599802970886, |
|
"learning_rate": 3.532763532763533e-06, |
|
"loss": 1.5079, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6524216524216524, |
|
"grad_norm": 0.42481309175491333, |
|
"learning_rate": 3.5042735042735045e-06, |
|
"loss": 1.4422, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6552706552706553, |
|
"grad_norm": 0.43366938829421997, |
|
"learning_rate": 3.4757834757834764e-06, |
|
"loss": 1.467, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6581196581196581, |
|
"grad_norm": 0.5313072204589844, |
|
"learning_rate": 3.447293447293448e-06, |
|
"loss": 1.4382, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6609686609686609, |
|
"grad_norm": 0.40050390362739563, |
|
"learning_rate": 3.4188034188034193e-06, |
|
"loss": 1.4024, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6638176638176638, |
|
"grad_norm": 0.42196667194366455, |
|
"learning_rate": 3.3903133903133904e-06, |
|
"loss": 1.4825, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.4109940826892853, |
|
"learning_rate": 3.361823361823362e-06, |
|
"loss": 1.4036, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6695156695156695, |
|
"grad_norm": 0.41641300916671753, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.4409, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6723646723646723, |
|
"grad_norm": 0.4459202289581299, |
|
"learning_rate": 3.304843304843305e-06, |
|
"loss": 1.4422, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6752136752136753, |
|
"grad_norm": 0.40903767943382263, |
|
"learning_rate": 3.2763532763532767e-06, |
|
"loss": 1.4375, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6780626780626781, |
|
"grad_norm": 0.40536248683929443, |
|
"learning_rate": 3.247863247863248e-06, |
|
"loss": 1.4357, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6809116809116809, |
|
"grad_norm": 0.43088406324386597, |
|
"learning_rate": 3.2193732193732196e-06, |
|
"loss": 1.4428, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 0.43017005920410156, |
|
"learning_rate": 3.190883190883191e-06, |
|
"loss": 1.4213, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6866096866096866, |
|
"grad_norm": 0.43592897057533264, |
|
"learning_rate": 3.1623931623931626e-06, |
|
"loss": 1.5107, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6894586894586895, |
|
"grad_norm": 0.6451869606971741, |
|
"learning_rate": 3.133903133903134e-06, |
|
"loss": 1.4993, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 0.45624542236328125, |
|
"learning_rate": 3.105413105413106e-06, |
|
"loss": 1.4297, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6951566951566952, |
|
"grad_norm": 0.4131554067134857, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.4272, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.698005698005698, |
|
"grad_norm": 0.49703848361968994, |
|
"learning_rate": 3.048433048433049e-06, |
|
"loss": 1.4175, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7008547008547008, |
|
"grad_norm": 0.4367448091506958, |
|
"learning_rate": 3.0199430199430204e-06, |
|
"loss": 1.4585, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7037037037037037, |
|
"grad_norm": 0.44849011301994324, |
|
"learning_rate": 2.9914529914529914e-06, |
|
"loss": 1.4596, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7065527065527065, |
|
"grad_norm": 0.42930400371551514, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 1.4335, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7094017094017094, |
|
"grad_norm": 0.4332965612411499, |
|
"learning_rate": 2.9344729344729344e-06, |
|
"loss": 1.4509, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7122507122507122, |
|
"grad_norm": 0.44173556566238403, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"loss": 1.4596, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7150997150997151, |
|
"grad_norm": 0.40930160880088806, |
|
"learning_rate": 2.8774928774928778e-06, |
|
"loss": 1.4327, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.717948717948718, |
|
"grad_norm": 0.4137099087238312, |
|
"learning_rate": 2.8490028490028492e-06, |
|
"loss": 1.4119, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7207977207977208, |
|
"grad_norm": 0.43292713165283203, |
|
"learning_rate": 2.8205128205128207e-06, |
|
"loss": 1.4352, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7236467236467237, |
|
"grad_norm": 0.6853729486465454, |
|
"learning_rate": 2.792022792022792e-06, |
|
"loss": 1.4859, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7264957264957265, |
|
"grad_norm": 0.4223368465900421, |
|
"learning_rate": 2.7635327635327636e-06, |
|
"loss": 1.4189, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7293447293447294, |
|
"grad_norm": 0.4098432958126068, |
|
"learning_rate": 2.7350427350427355e-06, |
|
"loss": 1.4474, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7321937321937322, |
|
"grad_norm": 0.42546141147613525, |
|
"learning_rate": 2.706552706552707e-06, |
|
"loss": 1.4447, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7350427350427351, |
|
"grad_norm": 0.434319406747818, |
|
"learning_rate": 2.6780626780626785e-06, |
|
"loss": 1.4559, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7378917378917379, |
|
"grad_norm": 0.5959000587463379, |
|
"learning_rate": 2.64957264957265e-06, |
|
"loss": 1.3711, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 0.6558396220207214, |
|
"learning_rate": 2.6210826210826214e-06, |
|
"loss": 1.3735, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7435897435897436, |
|
"grad_norm": 0.4049711525440216, |
|
"learning_rate": 2.5925925925925925e-06, |
|
"loss": 1.4327, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7464387464387464, |
|
"grad_norm": 0.4057099223136902, |
|
"learning_rate": 2.564102564102564e-06, |
|
"loss": 1.4173, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7492877492877493, |
|
"grad_norm": 0.44100022315979004, |
|
"learning_rate": 2.535612535612536e-06, |
|
"loss": 1.4568, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7521367521367521, |
|
"grad_norm": 0.4259463846683502, |
|
"learning_rate": 2.5071225071225073e-06, |
|
"loss": 1.4473, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7549857549857549, |
|
"grad_norm": 0.47139763832092285, |
|
"learning_rate": 2.478632478632479e-06, |
|
"loss": 1.4467, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7578347578347578, |
|
"grad_norm": 0.4066116511821747, |
|
"learning_rate": 2.4501424501424503e-06, |
|
"loss": 1.4148, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7606837606837606, |
|
"grad_norm": 0.4442392587661743, |
|
"learning_rate": 2.4216524216524218e-06, |
|
"loss": 1.4166, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7635327635327636, |
|
"grad_norm": 0.4146524667739868, |
|
"learning_rate": 2.3931623931623937e-06, |
|
"loss": 1.4214, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7663817663817664, |
|
"grad_norm": 0.4352812170982361, |
|
"learning_rate": 2.3646723646723647e-06, |
|
"loss": 1.4268, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.4416466951370239, |
|
"learning_rate": 2.336182336182336e-06, |
|
"loss": 1.3947, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7720797720797721, |
|
"grad_norm": 0.4440385699272156, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 1.4114, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7749287749287749, |
|
"grad_norm": 0.42091333866119385, |
|
"learning_rate": 2.2792022792022796e-06, |
|
"loss": 1.4343, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.39965999126434326, |
|
"learning_rate": 2.250712250712251e-06, |
|
"loss": 1.4401, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7806267806267806, |
|
"grad_norm": 0.4088633060455322, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.3808, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7834757834757835, |
|
"grad_norm": 0.42541617155075073, |
|
"learning_rate": 2.193732193732194e-06, |
|
"loss": 1.45, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7863247863247863, |
|
"grad_norm": 0.42558950185775757, |
|
"learning_rate": 2.1652421652421654e-06, |
|
"loss": 1.4317, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7891737891737892, |
|
"grad_norm": 0.4297507703304291, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 1.4493, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.792022792022792, |
|
"grad_norm": 0.42826247215270996, |
|
"learning_rate": 2.1082621082621084e-06, |
|
"loss": 1.4665, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7948717948717948, |
|
"grad_norm": 0.4104038178920746, |
|
"learning_rate": 2.07977207977208e-06, |
|
"loss": 1.3966, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7977207977207977, |
|
"grad_norm": 0.5832846164703369, |
|
"learning_rate": 2.0512820512820513e-06, |
|
"loss": 1.409, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8005698005698005, |
|
"grad_norm": 0.4132280647754669, |
|
"learning_rate": 2.0227920227920232e-06, |
|
"loss": 1.421, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8034188034188035, |
|
"grad_norm": 0.5175873637199402, |
|
"learning_rate": 1.9943019943019947e-06, |
|
"loss": 1.4251, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8062678062678063, |
|
"grad_norm": 0.3983429968357086, |
|
"learning_rate": 1.9658119658119658e-06, |
|
"loss": 1.4305, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8091168091168092, |
|
"grad_norm": 0.4195236563682556, |
|
"learning_rate": 1.9373219373219372e-06, |
|
"loss": 1.3955, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.811965811965812, |
|
"grad_norm": 0.44437727332115173, |
|
"learning_rate": 1.908831908831909e-06, |
|
"loss": 1.3945, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8148148148148148, |
|
"grad_norm": 0.4069578945636749, |
|
"learning_rate": 1.8803418803418804e-06, |
|
"loss": 1.3872, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8176638176638177, |
|
"grad_norm": 0.4366849660873413, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 1.4303, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 0.42686140537261963, |
|
"learning_rate": 1.8233618233618236e-06, |
|
"loss": 1.4297, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8233618233618234, |
|
"grad_norm": 0.4372996687889099, |
|
"learning_rate": 1.794871794871795e-06, |
|
"loss": 1.4205, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8262108262108262, |
|
"grad_norm": 0.5185275077819824, |
|
"learning_rate": 1.7663817663817665e-06, |
|
"loss": 1.4072, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8290598290598291, |
|
"grad_norm": 0.4375689625740051, |
|
"learning_rate": 1.7378917378917382e-06, |
|
"loss": 1.4093, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8319088319088319, |
|
"grad_norm": 0.6223400235176086, |
|
"learning_rate": 1.7094017094017097e-06, |
|
"loss": 1.4038, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8347578347578347, |
|
"grad_norm": 0.49658337235450745, |
|
"learning_rate": 1.680911680911681e-06, |
|
"loss": 1.4587, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8376068376068376, |
|
"grad_norm": 0.48749840259552, |
|
"learning_rate": 1.6524216524216524e-06, |
|
"loss": 1.4573, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8404558404558404, |
|
"grad_norm": 0.4375877380371094, |
|
"learning_rate": 1.623931623931624e-06, |
|
"loss": 1.4126, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8433048433048433, |
|
"grad_norm": 0.5864587426185608, |
|
"learning_rate": 1.5954415954415956e-06, |
|
"loss": 1.3915, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 0.4243745803833008, |
|
"learning_rate": 1.566951566951567e-06, |
|
"loss": 1.4475, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8490028490028491, |
|
"grad_norm": 0.5398270487785339, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 1.3658, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8518518518518519, |
|
"grad_norm": 0.4248296916484833, |
|
"learning_rate": 1.5099715099715102e-06, |
|
"loss": 1.3898, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 0.4054194986820221, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 1.3806, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8575498575498576, |
|
"grad_norm": 0.4230331778526306, |
|
"learning_rate": 1.4529914529914531e-06, |
|
"loss": 1.431, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8603988603988604, |
|
"grad_norm": 0.42785853147506714, |
|
"learning_rate": 1.4245014245014246e-06, |
|
"loss": 1.3905, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8632478632478633, |
|
"grad_norm": 0.6043952703475952, |
|
"learning_rate": 1.396011396011396e-06, |
|
"loss": 1.444, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8660968660968661, |
|
"grad_norm": 0.41546547412872314, |
|
"learning_rate": 1.3675213675213678e-06, |
|
"loss": 1.3876, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8689458689458689, |
|
"grad_norm": 0.5535686612129211, |
|
"learning_rate": 1.3390313390313392e-06, |
|
"loss": 1.3663, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8717948717948718, |
|
"grad_norm": 0.43172240257263184, |
|
"learning_rate": 1.3105413105413107e-06, |
|
"loss": 1.4281, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8746438746438746, |
|
"grad_norm": 0.4234292209148407, |
|
"learning_rate": 1.282051282051282e-06, |
|
"loss": 1.4105, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8774928774928775, |
|
"grad_norm": 0.4184323847293854, |
|
"learning_rate": 1.2535612535612537e-06, |
|
"loss": 1.3755, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8803418803418803, |
|
"grad_norm": 0.6069676876068115, |
|
"learning_rate": 1.2250712250712251e-06, |
|
"loss": 1.3666, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8831908831908832, |
|
"grad_norm": 0.4531959891319275, |
|
"learning_rate": 1.1965811965811968e-06, |
|
"loss": 1.4109, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.886039886039886, |
|
"grad_norm": 0.49059048295021057, |
|
"learning_rate": 1.168091168091168e-06, |
|
"loss": 1.4259, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.4053284823894501, |
|
"learning_rate": 1.1396011396011398e-06, |
|
"loss": 1.4173, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8917378917378918, |
|
"grad_norm": 0.4258776307106018, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 1.4079, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8945868945868946, |
|
"grad_norm": 0.4315298795700073, |
|
"learning_rate": 1.0826210826210827e-06, |
|
"loss": 1.3791, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 0.48497509956359863, |
|
"learning_rate": 1.0541310541310542e-06, |
|
"loss": 1.4389, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9002849002849003, |
|
"grad_norm": 0.4596964716911316, |
|
"learning_rate": 1.0256410256410257e-06, |
|
"loss": 1.4253, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9031339031339032, |
|
"grad_norm": 0.43682560324668884, |
|
"learning_rate": 9.971509971509974e-07, |
|
"loss": 1.4358, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.905982905982906, |
|
"grad_norm": 0.5284684896469116, |
|
"learning_rate": 9.686609686609686e-07, |
|
"loss": 1.3974, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9088319088319088, |
|
"grad_norm": 0.444614440202713, |
|
"learning_rate": 9.401709401709402e-07, |
|
"loss": 1.4258, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.9116809116809117, |
|
"grad_norm": 0.41446149349212646, |
|
"learning_rate": 9.116809116809118e-07, |
|
"loss": 1.4093, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9145299145299145, |
|
"grad_norm": 0.505181074142456, |
|
"learning_rate": 8.831908831908833e-07, |
|
"loss": 1.4355, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9173789173789174, |
|
"grad_norm": 0.41858991980552673, |
|
"learning_rate": 8.547008547008548e-07, |
|
"loss": 1.4259, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9202279202279202, |
|
"grad_norm": 0.6958276033401489, |
|
"learning_rate": 8.262108262108262e-07, |
|
"loss": 1.4456, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 0.4824206829071045, |
|
"learning_rate": 7.977207977207978e-07, |
|
"loss": 1.4341, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 0.4208286702632904, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.4401, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9287749287749287, |
|
"grad_norm": 0.43090713024139404, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 1.4295, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9316239316239316, |
|
"grad_norm": 0.4124811589717865, |
|
"learning_rate": 7.122507122507123e-07, |
|
"loss": 1.4234, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9344729344729344, |
|
"grad_norm": 0.4865758419036865, |
|
"learning_rate": 6.837606837606839e-07, |
|
"loss": 1.4787, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9373219373219374, |
|
"grad_norm": 0.4624764323234558, |
|
"learning_rate": 6.552706552706554e-07, |
|
"loss": 1.3913, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9401709401709402, |
|
"grad_norm": 0.4168078899383545, |
|
"learning_rate": 6.267806267806268e-07, |
|
"loss": 1.3954, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9430199430199431, |
|
"grad_norm": 0.43121403455734253, |
|
"learning_rate": 5.982905982905984e-07, |
|
"loss": 1.4046, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9458689458689459, |
|
"grad_norm": 0.43017080426216125, |
|
"learning_rate": 5.698005698005699e-07, |
|
"loss": 1.4471, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9487179487179487, |
|
"grad_norm": 0.41371017694473267, |
|
"learning_rate": 5.413105413105414e-07, |
|
"loss": 1.3891, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9515669515669516, |
|
"grad_norm": 0.42624595761299133, |
|
"learning_rate": 5.128205128205128e-07, |
|
"loss": 1.4431, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9544159544159544, |
|
"grad_norm": 0.4311563968658447, |
|
"learning_rate": 4.843304843304843e-07, |
|
"loss": 1.3985, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9572649572649573, |
|
"grad_norm": 0.42693498730659485, |
|
"learning_rate": 4.558404558404559e-07, |
|
"loss": 1.3818, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9601139601139601, |
|
"grad_norm": 0.7170986533164978, |
|
"learning_rate": 4.273504273504274e-07, |
|
"loss": 1.4704, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9629629629629629, |
|
"grad_norm": 0.42342740297317505, |
|
"learning_rate": 3.988603988603989e-07, |
|
"loss": 1.4172, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9658119658119658, |
|
"grad_norm": 0.5637214183807373, |
|
"learning_rate": 3.7037037037037036e-07, |
|
"loss": 1.3729, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9686609686609686, |
|
"grad_norm": 0.42340558767318726, |
|
"learning_rate": 3.4188034188034194e-07, |
|
"loss": 1.3958, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9715099715099715, |
|
"grad_norm": 0.4184475541114807, |
|
"learning_rate": 3.133903133903134e-07, |
|
"loss": 1.4015, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9743589743589743, |
|
"grad_norm": 0.42320722341537476, |
|
"learning_rate": 2.8490028490028494e-07, |
|
"loss": 1.396, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9772079772079773, |
|
"grad_norm": 0.4045957624912262, |
|
"learning_rate": 2.564102564102564e-07, |
|
"loss": 1.4237, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.98005698005698, |
|
"grad_norm": 0.4371383488178253, |
|
"learning_rate": 2.2792022792022794e-07, |
|
"loss": 1.4127, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9829059829059829, |
|
"grad_norm": 0.5121440291404724, |
|
"learning_rate": 1.9943019943019944e-07, |
|
"loss": 1.4109, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9857549857549858, |
|
"grad_norm": 0.42042669653892517, |
|
"learning_rate": 1.7094017094017097e-07, |
|
"loss": 1.352, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9886039886039886, |
|
"grad_norm": 0.7115257382392883, |
|
"learning_rate": 1.4245014245014247e-07, |
|
"loss": 1.404, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9914529914529915, |
|
"grad_norm": 0.5735996961593628, |
|
"learning_rate": 1.1396011396011397e-07, |
|
"loss": 1.4461, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9943019943019943, |
|
"grad_norm": 0.4333067834377289, |
|
"learning_rate": 8.547008547008549e-08, |
|
"loss": 1.403, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9971509971509972, |
|
"grad_norm": 0.4068621098995209, |
|
"learning_rate": 5.6980056980056986e-08, |
|
"loss": 1.4357, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6922910809516907, |
|
"learning_rate": 2.8490028490028493e-08, |
|
"loss": 1.4253, |
|
"step": 351 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 351, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6380890003275776e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|