|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 404, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0024752475247524753, |
|
"grad_norm": 0.866270124912262, |
|
"learning_rate": 1e-05, |
|
"loss": 2.233, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0049504950495049506, |
|
"grad_norm": 0.9500324726104736, |
|
"learning_rate": 9.975247524752477e-06, |
|
"loss": 2.3925, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007425742574257425, |
|
"grad_norm": 0.9805428385734558, |
|
"learning_rate": 9.950495049504951e-06, |
|
"loss": 2.4263, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009900990099009901, |
|
"grad_norm": 0.9103994965553284, |
|
"learning_rate": 9.925742574257427e-06, |
|
"loss": 2.3704, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012376237623762377, |
|
"grad_norm": 0.8131201863288879, |
|
"learning_rate": 9.900990099009901e-06, |
|
"loss": 2.2257, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01485148514851485, |
|
"grad_norm": 0.8377613425254822, |
|
"learning_rate": 9.876237623762377e-06, |
|
"loss": 2.272, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.017326732673267328, |
|
"grad_norm": 0.8485517501831055, |
|
"learning_rate": 9.851485148514852e-06, |
|
"loss": 2.3425, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019801980198019802, |
|
"grad_norm": 0.7759255766868591, |
|
"learning_rate": 9.826732673267328e-06, |
|
"loss": 2.26, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.022277227722772276, |
|
"grad_norm": 0.6999015212059021, |
|
"learning_rate": 9.801980198019802e-06, |
|
"loss": 2.1296, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.024752475247524754, |
|
"grad_norm": 0.7118146419525146, |
|
"learning_rate": 9.777227722772278e-06, |
|
"loss": 2.2419, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027227722772277228, |
|
"grad_norm": 0.6341562867164612, |
|
"learning_rate": 9.752475247524754e-06, |
|
"loss": 2.1281, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0297029702970297, |
|
"grad_norm": 0.6499541997909546, |
|
"learning_rate": 9.727722772277228e-06, |
|
"loss": 2.1709, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03217821782178218, |
|
"grad_norm": 0.6424671411514282, |
|
"learning_rate": 9.702970297029704e-06, |
|
"loss": 2.1623, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.034653465346534656, |
|
"grad_norm": 0.6142817735671997, |
|
"learning_rate": 9.678217821782178e-06, |
|
"loss": 2.178, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03712871287128713, |
|
"grad_norm": 0.5500190258026123, |
|
"learning_rate": 9.653465346534654e-06, |
|
"loss": 2.0283, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.039603960396039604, |
|
"grad_norm": 0.5886285901069641, |
|
"learning_rate": 9.628712871287129e-06, |
|
"loss": 2.1614, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04207920792079208, |
|
"grad_norm": 0.5357037782669067, |
|
"learning_rate": 9.603960396039604e-06, |
|
"loss": 2.0643, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04455445544554455, |
|
"grad_norm": 0.5793443918228149, |
|
"learning_rate": 9.579207920792079e-06, |
|
"loss": 2.1523, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04702970297029703, |
|
"grad_norm": 0.5294497609138489, |
|
"learning_rate": 9.554455445544555e-06, |
|
"loss": 2.064, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04950495049504951, |
|
"grad_norm": 0.5384939908981323, |
|
"learning_rate": 9.52970297029703e-06, |
|
"loss": 2.1125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05198019801980198, |
|
"grad_norm": 0.5320760607719421, |
|
"learning_rate": 9.504950495049505e-06, |
|
"loss": 2.0449, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.054455445544554455, |
|
"grad_norm": 0.5393515229225159, |
|
"learning_rate": 9.480198019801981e-06, |
|
"loss": 2.068, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05693069306930693, |
|
"grad_norm": 0.5507024526596069, |
|
"learning_rate": 9.455445544554455e-06, |
|
"loss": 2.1202, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0594059405940594, |
|
"grad_norm": 0.5239824056625366, |
|
"learning_rate": 9.430693069306931e-06, |
|
"loss": 2.0453, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06188118811881188, |
|
"grad_norm": 0.48455333709716797, |
|
"learning_rate": 9.405940594059405e-06, |
|
"loss": 1.96, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06435643564356436, |
|
"grad_norm": 0.4918675422668457, |
|
"learning_rate": 9.381188118811881e-06, |
|
"loss": 1.9884, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06683168316831684, |
|
"grad_norm": 0.4760024845600128, |
|
"learning_rate": 9.356435643564357e-06, |
|
"loss": 1.9526, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06930693069306931, |
|
"grad_norm": 0.49026936292648315, |
|
"learning_rate": 9.331683168316833e-06, |
|
"loss": 1.9912, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07178217821782178, |
|
"grad_norm": 0.4500166177749634, |
|
"learning_rate": 9.306930693069308e-06, |
|
"loss": 1.923, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07425742574257425, |
|
"grad_norm": 0.5082471966743469, |
|
"learning_rate": 9.282178217821784e-06, |
|
"loss": 2.0664, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07673267326732673, |
|
"grad_norm": 0.46434321999549866, |
|
"learning_rate": 9.257425742574258e-06, |
|
"loss": 1.9853, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07920792079207921, |
|
"grad_norm": 0.4646989703178406, |
|
"learning_rate": 9.232673267326734e-06, |
|
"loss": 1.933, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08168316831683169, |
|
"grad_norm": 0.44078710675239563, |
|
"learning_rate": 9.20792079207921e-06, |
|
"loss": 1.9257, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08415841584158416, |
|
"grad_norm": 0.43003711104393005, |
|
"learning_rate": 9.183168316831684e-06, |
|
"loss": 1.8782, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08663366336633663, |
|
"grad_norm": 0.4398047626018524, |
|
"learning_rate": 9.15841584158416e-06, |
|
"loss": 1.8992, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0891089108910891, |
|
"grad_norm": 0.454773873090744, |
|
"learning_rate": 9.133663366336634e-06, |
|
"loss": 1.9095, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09158415841584158, |
|
"grad_norm": 0.4209723472595215, |
|
"learning_rate": 9.10891089108911e-06, |
|
"loss": 1.8878, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09405940594059406, |
|
"grad_norm": 0.5108030438423157, |
|
"learning_rate": 9.084158415841585e-06, |
|
"loss": 1.9941, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09653465346534654, |
|
"grad_norm": 0.3964357376098633, |
|
"learning_rate": 9.05940594059406e-06, |
|
"loss": 1.8688, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09900990099009901, |
|
"grad_norm": 0.4510701298713684, |
|
"learning_rate": 9.034653465346535e-06, |
|
"loss": 1.9611, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10148514851485149, |
|
"grad_norm": 0.41456782817840576, |
|
"learning_rate": 9.009900990099011e-06, |
|
"loss": 1.9325, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10396039603960396, |
|
"grad_norm": 0.4038480520248413, |
|
"learning_rate": 8.985148514851487e-06, |
|
"loss": 1.8802, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10643564356435643, |
|
"grad_norm": 0.3977126181125641, |
|
"learning_rate": 8.960396039603961e-06, |
|
"loss": 1.8851, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10891089108910891, |
|
"grad_norm": 0.38500988483428955, |
|
"learning_rate": 8.935643564356437e-06, |
|
"loss": 1.865, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11138613861386139, |
|
"grad_norm": 0.4324626326560974, |
|
"learning_rate": 8.910891089108911e-06, |
|
"loss": 1.9546, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11386138613861387, |
|
"grad_norm": 0.3916115164756775, |
|
"learning_rate": 8.886138613861387e-06, |
|
"loss": 1.8974, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11633663366336634, |
|
"grad_norm": 0.3723801374435425, |
|
"learning_rate": 8.861386138613862e-06, |
|
"loss": 1.8051, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1188118811881188, |
|
"grad_norm": 0.37748202681541443, |
|
"learning_rate": 8.836633663366338e-06, |
|
"loss": 1.8079, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12128712871287128, |
|
"grad_norm": 0.3759283125400543, |
|
"learning_rate": 8.811881188118812e-06, |
|
"loss": 1.7865, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12376237623762376, |
|
"grad_norm": 0.4520368278026581, |
|
"learning_rate": 8.787128712871288e-06, |
|
"loss": 1.8109, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12623762376237624, |
|
"grad_norm": 0.3660925030708313, |
|
"learning_rate": 8.762376237623764e-06, |
|
"loss": 1.7482, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12871287128712872, |
|
"grad_norm": 0.37427181005477905, |
|
"learning_rate": 8.737623762376238e-06, |
|
"loss": 1.7701, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1311881188118812, |
|
"grad_norm": 0.36493778228759766, |
|
"learning_rate": 8.712871287128714e-06, |
|
"loss": 1.7917, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13366336633663367, |
|
"grad_norm": 0.3632214665412903, |
|
"learning_rate": 8.688118811881188e-06, |
|
"loss": 1.7502, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13613861386138615, |
|
"grad_norm": 0.3864065408706665, |
|
"learning_rate": 8.663366336633664e-06, |
|
"loss": 1.8106, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13861386138613863, |
|
"grad_norm": 0.379221647977829, |
|
"learning_rate": 8.638613861386139e-06, |
|
"loss": 1.7649, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14108910891089108, |
|
"grad_norm": 0.3508760929107666, |
|
"learning_rate": 8.613861386138615e-06, |
|
"loss": 1.7476, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14356435643564355, |
|
"grad_norm": 0.3341202139854431, |
|
"learning_rate": 8.58910891089109e-06, |
|
"loss": 1.7198, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14603960396039603, |
|
"grad_norm": 0.3496810793876648, |
|
"learning_rate": 8.564356435643565e-06, |
|
"loss": 1.7532, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1485148514851485, |
|
"grad_norm": 0.3431270122528076, |
|
"learning_rate": 8.53960396039604e-06, |
|
"loss": 1.6886, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15099009900990099, |
|
"grad_norm": 0.35412609577178955, |
|
"learning_rate": 8.514851485148515e-06, |
|
"loss": 1.7241, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15346534653465346, |
|
"grad_norm": 0.34784942865371704, |
|
"learning_rate": 8.490099009900991e-06, |
|
"loss": 1.7613, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15594059405940594, |
|
"grad_norm": 0.43794113397598267, |
|
"learning_rate": 8.465346534653465e-06, |
|
"loss": 1.7395, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15841584158415842, |
|
"grad_norm": 0.3304344117641449, |
|
"learning_rate": 8.440594059405941e-06, |
|
"loss": 1.7528, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1608910891089109, |
|
"grad_norm": 0.32466626167297363, |
|
"learning_rate": 8.415841584158416e-06, |
|
"loss": 1.6578, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16336633663366337, |
|
"grad_norm": 0.32768896222114563, |
|
"learning_rate": 8.391089108910891e-06, |
|
"loss": 1.6906, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16584158415841585, |
|
"grad_norm": 0.34629130363464355, |
|
"learning_rate": 8.366336633663367e-06, |
|
"loss": 1.6983, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16831683168316833, |
|
"grad_norm": 0.4086500108242035, |
|
"learning_rate": 8.341584158415842e-06, |
|
"loss": 1.7356, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1707920792079208, |
|
"grad_norm": 0.34105509519577026, |
|
"learning_rate": 8.316831683168318e-06, |
|
"loss": 1.727, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.17326732673267325, |
|
"grad_norm": 0.3648548722267151, |
|
"learning_rate": 8.292079207920792e-06, |
|
"loss": 1.6912, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17574257425742573, |
|
"grad_norm": 0.3171598017215729, |
|
"learning_rate": 8.267326732673268e-06, |
|
"loss": 1.6563, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1782178217821782, |
|
"grad_norm": 0.32731616497039795, |
|
"learning_rate": 8.242574257425742e-06, |
|
"loss": 1.64, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1806930693069307, |
|
"grad_norm": 0.34876877069473267, |
|
"learning_rate": 8.217821782178218e-06, |
|
"loss": 1.766, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18316831683168316, |
|
"grad_norm": 0.34440743923187256, |
|
"learning_rate": 8.193069306930692e-06, |
|
"loss": 1.7217, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18564356435643564, |
|
"grad_norm": 0.3497994542121887, |
|
"learning_rate": 8.168316831683168e-06, |
|
"loss": 1.6761, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18811881188118812, |
|
"grad_norm": 0.3406583368778229, |
|
"learning_rate": 8.143564356435644e-06, |
|
"loss": 1.6707, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1905940594059406, |
|
"grad_norm": 0.32504573464393616, |
|
"learning_rate": 8.11881188118812e-06, |
|
"loss": 1.7381, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.19306930693069307, |
|
"grad_norm": 0.36358287930488586, |
|
"learning_rate": 8.094059405940595e-06, |
|
"loss": 1.705, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.19554455445544555, |
|
"grad_norm": 0.3335002660751343, |
|
"learning_rate": 8.06930693069307e-06, |
|
"loss": 1.6123, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19801980198019803, |
|
"grad_norm": 0.32308968901634216, |
|
"learning_rate": 8.044554455445545e-06, |
|
"loss": 1.6454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2004950495049505, |
|
"grad_norm": 0.33724862337112427, |
|
"learning_rate": 8.019801980198021e-06, |
|
"loss": 1.6448, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.20297029702970298, |
|
"grad_norm": 0.34251338243484497, |
|
"learning_rate": 7.995049504950497e-06, |
|
"loss": 1.663, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.20544554455445543, |
|
"grad_norm": 0.3260180950164795, |
|
"learning_rate": 7.970297029702971e-06, |
|
"loss": 1.6753, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2079207920792079, |
|
"grad_norm": 0.344461053609848, |
|
"learning_rate": 7.945544554455447e-06, |
|
"loss": 1.6731, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2103960396039604, |
|
"grad_norm": 0.3209852874279022, |
|
"learning_rate": 7.920792079207921e-06, |
|
"loss": 1.6023, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21287128712871287, |
|
"grad_norm": 0.31528371572494507, |
|
"learning_rate": 7.896039603960397e-06, |
|
"loss": 1.676, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21534653465346534, |
|
"grad_norm": 0.3156762719154358, |
|
"learning_rate": 7.871287128712872e-06, |
|
"loss": 1.6326, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21782178217821782, |
|
"grad_norm": 0.3144882321357727, |
|
"learning_rate": 7.846534653465348e-06, |
|
"loss": 1.6144, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2202970297029703, |
|
"grad_norm": 0.3290737569332123, |
|
"learning_rate": 7.821782178217822e-06, |
|
"loss": 1.6828, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22277227722772278, |
|
"grad_norm": 0.33056607842445374, |
|
"learning_rate": 7.797029702970298e-06, |
|
"loss": 1.6539, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22524752475247525, |
|
"grad_norm": 0.32440483570098877, |
|
"learning_rate": 7.772277227722774e-06, |
|
"loss": 1.5998, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.22772277227722773, |
|
"grad_norm": 0.4419662058353424, |
|
"learning_rate": 7.747524752475248e-06, |
|
"loss": 1.6359, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2301980198019802, |
|
"grad_norm": 0.3224891722202301, |
|
"learning_rate": 7.722772277227724e-06, |
|
"loss": 1.6052, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23267326732673269, |
|
"grad_norm": 0.4768555462360382, |
|
"learning_rate": 7.698019801980198e-06, |
|
"loss": 1.6429, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.23514851485148514, |
|
"grad_norm": 0.33938202261924744, |
|
"learning_rate": 7.673267326732674e-06, |
|
"loss": 1.6235, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2376237623762376, |
|
"grad_norm": 0.3807191550731659, |
|
"learning_rate": 7.648514851485149e-06, |
|
"loss": 1.5357, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2400990099009901, |
|
"grad_norm": 0.3358289301395416, |
|
"learning_rate": 7.6237623762376246e-06, |
|
"loss": 1.6076, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24257425742574257, |
|
"grad_norm": 0.3264116048812866, |
|
"learning_rate": 7.5990099009901e-06, |
|
"loss": 1.6122, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.24504950495049505, |
|
"grad_norm": 0.31078216433525085, |
|
"learning_rate": 7.574257425742575e-06, |
|
"loss": 1.6074, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.24752475247524752, |
|
"grad_norm": 0.330763041973114, |
|
"learning_rate": 7.54950495049505e-06, |
|
"loss": 1.6214, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3432685136795044, |
|
"learning_rate": 7.524752475247525e-06, |
|
"loss": 1.5724, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2524752475247525, |
|
"grad_norm": 0.3254147171974182, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.6136, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.25495049504950495, |
|
"grad_norm": 0.313223272562027, |
|
"learning_rate": 7.475247524752476e-06, |
|
"loss": 1.5892, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.25742574257425743, |
|
"grad_norm": 0.32973653078079224, |
|
"learning_rate": 7.450495049504951e-06, |
|
"loss": 1.5771, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2599009900990099, |
|
"grad_norm": 0.36007925868034363, |
|
"learning_rate": 7.425742574257426e-06, |
|
"loss": 1.5775, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2623762376237624, |
|
"grad_norm": 0.37326544523239136, |
|
"learning_rate": 7.4009900990099015e-06, |
|
"loss": 1.618, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.26485148514851486, |
|
"grad_norm": 0.3342721164226532, |
|
"learning_rate": 7.376237623762377e-06, |
|
"loss": 1.6054, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.26732673267326734, |
|
"grad_norm": 0.3356787860393524, |
|
"learning_rate": 7.351485148514852e-06, |
|
"loss": 1.5623, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2698019801980198, |
|
"grad_norm": 0.3222273290157318, |
|
"learning_rate": 7.326732673267327e-06, |
|
"loss": 1.5933, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2722772277227723, |
|
"grad_norm": 0.3217301368713379, |
|
"learning_rate": 7.301980198019802e-06, |
|
"loss": 1.5872, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2747524752475248, |
|
"grad_norm": 0.3168449401855469, |
|
"learning_rate": 7.277227722772278e-06, |
|
"loss": 1.5261, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.27722772277227725, |
|
"grad_norm": 0.3279650807380676, |
|
"learning_rate": 7.252475247524753e-06, |
|
"loss": 1.6002, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.27970297029702973, |
|
"grad_norm": 0.3291718363761902, |
|
"learning_rate": 7.227722772277228e-06, |
|
"loss": 1.6183, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.28217821782178215, |
|
"grad_norm": 0.3178790509700775, |
|
"learning_rate": 7.202970297029703e-06, |
|
"loss": 1.6182, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.28465346534653463, |
|
"grad_norm": 0.3220016360282898, |
|
"learning_rate": 7.1782178217821785e-06, |
|
"loss": 1.5684, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2871287128712871, |
|
"grad_norm": 0.33079689741134644, |
|
"learning_rate": 7.153465346534654e-06, |
|
"loss": 1.5559, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2896039603960396, |
|
"grad_norm": 0.43699851632118225, |
|
"learning_rate": 7.128712871287129e-06, |
|
"loss": 1.5457, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.29207920792079206, |
|
"grad_norm": 0.33569303154945374, |
|
"learning_rate": 7.103960396039604e-06, |
|
"loss": 1.5661, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.29455445544554454, |
|
"grad_norm": 0.36694973707199097, |
|
"learning_rate": 7.079207920792079e-06, |
|
"loss": 1.5669, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.297029702970297, |
|
"grad_norm": 0.3479726314544678, |
|
"learning_rate": 7.054455445544555e-06, |
|
"loss": 1.522, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2995049504950495, |
|
"grad_norm": 0.29995083808898926, |
|
"learning_rate": 7.02970297029703e-06, |
|
"loss": 1.5492, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.30198019801980197, |
|
"grad_norm": 0.3200836777687073, |
|
"learning_rate": 7.004950495049505e-06, |
|
"loss": 1.5385, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.30445544554455445, |
|
"grad_norm": 0.28753870725631714, |
|
"learning_rate": 6.98019801980198e-06, |
|
"loss": 1.5467, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3069306930693069, |
|
"grad_norm": 0.31007465720176697, |
|
"learning_rate": 6.9554455445544555e-06, |
|
"loss": 1.5118, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3094059405940594, |
|
"grad_norm": 0.4157215356826782, |
|
"learning_rate": 6.930693069306931e-06, |
|
"loss": 1.5532, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3118811881188119, |
|
"grad_norm": 0.2971484363079071, |
|
"learning_rate": 6.905940594059406e-06, |
|
"loss": 1.5584, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.31435643564356436, |
|
"grad_norm": 0.2835904061794281, |
|
"learning_rate": 6.881188118811881e-06, |
|
"loss": 1.5377, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.31683168316831684, |
|
"grad_norm": 0.30127376317977905, |
|
"learning_rate": 6.856435643564358e-06, |
|
"loss": 1.5746, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3193069306930693, |
|
"grad_norm": 0.28873226046562195, |
|
"learning_rate": 6.831683168316833e-06, |
|
"loss": 1.5095, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3217821782178218, |
|
"grad_norm": 0.30197909474372864, |
|
"learning_rate": 6.806930693069308e-06, |
|
"loss": 1.5331, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32425742574257427, |
|
"grad_norm": 0.41716259717941284, |
|
"learning_rate": 6.782178217821783e-06, |
|
"loss": 1.5582, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.32673267326732675, |
|
"grad_norm": 0.3023141920566559, |
|
"learning_rate": 6.757425742574258e-06, |
|
"loss": 1.4948, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3292079207920792, |
|
"grad_norm": 0.281654417514801, |
|
"learning_rate": 6.732673267326733e-06, |
|
"loss": 1.5389, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3316831683168317, |
|
"grad_norm": 0.30336490273475647, |
|
"learning_rate": 6.707920792079209e-06, |
|
"loss": 1.5602, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3341584158415842, |
|
"grad_norm": 0.38517317175865173, |
|
"learning_rate": 6.683168316831684e-06, |
|
"loss": 1.5381, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.33663366336633666, |
|
"grad_norm": 0.28759995102882385, |
|
"learning_rate": 6.6584158415841595e-06, |
|
"loss": 1.5295, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.33910891089108913, |
|
"grad_norm": 0.32320281863212585, |
|
"learning_rate": 6.633663366336635e-06, |
|
"loss": 1.5217, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3415841584158416, |
|
"grad_norm": 0.30896422266960144, |
|
"learning_rate": 6.60891089108911e-06, |
|
"loss": 1.5082, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.34405940594059403, |
|
"grad_norm": 0.34041446447372437, |
|
"learning_rate": 6.584158415841585e-06, |
|
"loss": 1.5737, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3465346534653465, |
|
"grad_norm": 0.2894056439399719, |
|
"learning_rate": 6.55940594059406e-06, |
|
"loss": 1.4919, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.349009900990099, |
|
"grad_norm": 0.30793777108192444, |
|
"learning_rate": 6.534653465346535e-06, |
|
"loss": 1.516, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.35148514851485146, |
|
"grad_norm": 0.28465503454208374, |
|
"learning_rate": 6.509900990099011e-06, |
|
"loss": 1.557, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.35396039603960394, |
|
"grad_norm": 0.3081417977809906, |
|
"learning_rate": 6.485148514851486e-06, |
|
"loss": 1.5123, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3564356435643564, |
|
"grad_norm": 0.29695266485214233, |
|
"learning_rate": 6.460396039603961e-06, |
|
"loss": 1.5055, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3589108910891089, |
|
"grad_norm": 0.2985694110393524, |
|
"learning_rate": 6.4356435643564364e-06, |
|
"loss": 1.5147, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3613861386138614, |
|
"grad_norm": 0.28632259368896484, |
|
"learning_rate": 6.4108910891089116e-06, |
|
"loss": 1.471, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.36386138613861385, |
|
"grad_norm": 0.3022250235080719, |
|
"learning_rate": 6.386138613861387e-06, |
|
"loss": 1.5442, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.36633663366336633, |
|
"grad_norm": 0.3914463222026825, |
|
"learning_rate": 6.361386138613862e-06, |
|
"loss": 1.4734, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3688118811881188, |
|
"grad_norm": 0.27985042333602905, |
|
"learning_rate": 6.336633663366337e-06, |
|
"loss": 1.5108, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3712871287128713, |
|
"grad_norm": 0.30216488242149353, |
|
"learning_rate": 6.311881188118812e-06, |
|
"loss": 1.5021, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37376237623762376, |
|
"grad_norm": 0.2911156415939331, |
|
"learning_rate": 6.287128712871288e-06, |
|
"loss": 1.5346, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.37623762376237624, |
|
"grad_norm": 0.31340810656547546, |
|
"learning_rate": 6.262376237623763e-06, |
|
"loss": 1.4989, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3787128712871287, |
|
"grad_norm": 0.30087149143218994, |
|
"learning_rate": 6.237623762376238e-06, |
|
"loss": 1.4866, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3811881188118812, |
|
"grad_norm": 0.2760373651981354, |
|
"learning_rate": 6.212871287128713e-06, |
|
"loss": 1.5085, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.38366336633663367, |
|
"grad_norm": 0.30896487832069397, |
|
"learning_rate": 6.1881188118811885e-06, |
|
"loss": 1.4165, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.38613861386138615, |
|
"grad_norm": 0.2805737555027008, |
|
"learning_rate": 6.163366336633664e-06, |
|
"loss": 1.4942, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3886138613861386, |
|
"grad_norm": 0.28713473677635193, |
|
"learning_rate": 6.138613861386139e-06, |
|
"loss": 1.479, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3910891089108911, |
|
"grad_norm": 0.3078054189682007, |
|
"learning_rate": 6.113861386138614e-06, |
|
"loss": 1.4805, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3935643564356436, |
|
"grad_norm": 0.2805648148059845, |
|
"learning_rate": 6.08910891089109e-06, |
|
"loss": 1.4876, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.39603960396039606, |
|
"grad_norm": 0.312046080827713, |
|
"learning_rate": 6.064356435643565e-06, |
|
"loss": 1.5346, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39851485148514854, |
|
"grad_norm": 0.31947430968284607, |
|
"learning_rate": 6.03960396039604e-06, |
|
"loss": 1.4913, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.400990099009901, |
|
"grad_norm": 0.3906289339065552, |
|
"learning_rate": 6.014851485148515e-06, |
|
"loss": 1.4746, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4034653465346535, |
|
"grad_norm": 0.29878419637680054, |
|
"learning_rate": 5.99009900990099e-06, |
|
"loss": 1.4722, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.40594059405940597, |
|
"grad_norm": 0.27551355957984924, |
|
"learning_rate": 5.9653465346534655e-06, |
|
"loss": 1.4753, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4084158415841584, |
|
"grad_norm": 0.30900660157203674, |
|
"learning_rate": 5.940594059405941e-06, |
|
"loss": 1.4817, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.41089108910891087, |
|
"grad_norm": 0.299325555562973, |
|
"learning_rate": 5.915841584158416e-06, |
|
"loss": 1.4887, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.41336633663366334, |
|
"grad_norm": 0.29113298654556274, |
|
"learning_rate": 5.891089108910891e-06, |
|
"loss": 1.4932, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4158415841584158, |
|
"grad_norm": 0.3152018189430237, |
|
"learning_rate": 5.866336633663367e-06, |
|
"loss": 1.5383, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4183168316831683, |
|
"grad_norm": 0.2825804650783539, |
|
"learning_rate": 5.841584158415842e-06, |
|
"loss": 1.4679, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4207920792079208, |
|
"grad_norm": 0.2988182604312897, |
|
"learning_rate": 5.816831683168317e-06, |
|
"loss": 1.4793, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.42326732673267325, |
|
"grad_norm": 0.3168175220489502, |
|
"learning_rate": 5.792079207920792e-06, |
|
"loss": 1.4327, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.42574257425742573, |
|
"grad_norm": 0.31008175015449524, |
|
"learning_rate": 5.767326732673267e-06, |
|
"loss": 1.5326, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4282178217821782, |
|
"grad_norm": 0.29081404209136963, |
|
"learning_rate": 5.7425742574257425e-06, |
|
"loss": 1.4325, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4306930693069307, |
|
"grad_norm": 0.2973237931728363, |
|
"learning_rate": 5.717821782178218e-06, |
|
"loss": 1.4198, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.43316831683168316, |
|
"grad_norm": 0.286101758480072, |
|
"learning_rate": 5.693069306930693e-06, |
|
"loss": 1.493, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.43564356435643564, |
|
"grad_norm": 0.2858099043369293, |
|
"learning_rate": 5.668316831683169e-06, |
|
"loss": 1.4629, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4381188118811881, |
|
"grad_norm": 0.3984195590019226, |
|
"learning_rate": 5.643564356435644e-06, |
|
"loss": 1.4704, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4405940594059406, |
|
"grad_norm": 0.2942948043346405, |
|
"learning_rate": 5.61881188118812e-06, |
|
"loss": 1.4569, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4430693069306931, |
|
"grad_norm": 0.2998722791671753, |
|
"learning_rate": 5.594059405940595e-06, |
|
"loss": 1.4956, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.44554455445544555, |
|
"grad_norm": 0.28324317932128906, |
|
"learning_rate": 5.56930693069307e-06, |
|
"loss": 1.4729, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.44801980198019803, |
|
"grad_norm": 0.4624726474285126, |
|
"learning_rate": 5.544554455445545e-06, |
|
"loss": 1.4922, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4504950495049505, |
|
"grad_norm": 0.2969602346420288, |
|
"learning_rate": 5.519801980198021e-06, |
|
"loss": 1.445, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.452970297029703, |
|
"grad_norm": 0.3186553418636322, |
|
"learning_rate": 5.495049504950496e-06, |
|
"loss": 1.4408, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.45544554455445546, |
|
"grad_norm": 0.3078846335411072, |
|
"learning_rate": 5.470297029702971e-06, |
|
"loss": 1.5323, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.45792079207920794, |
|
"grad_norm": 0.3202517032623291, |
|
"learning_rate": 5.4455445544554465e-06, |
|
"loss": 1.4391, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4603960396039604, |
|
"grad_norm": 0.3142746388912201, |
|
"learning_rate": 5.420792079207922e-06, |
|
"loss": 1.493, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4628712871287129, |
|
"grad_norm": 0.33466094732284546, |
|
"learning_rate": 5.396039603960397e-06, |
|
"loss": 1.4116, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.46534653465346537, |
|
"grad_norm": 0.2982538342475891, |
|
"learning_rate": 5.371287128712872e-06, |
|
"loss": 1.4613, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.46782178217821785, |
|
"grad_norm": 0.31709668040275574, |
|
"learning_rate": 5.346534653465347e-06, |
|
"loss": 1.4492, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.47029702970297027, |
|
"grad_norm": 0.29457882046699524, |
|
"learning_rate": 5.321782178217822e-06, |
|
"loss": 1.4405, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.47277227722772275, |
|
"grad_norm": 0.37957248091697693, |
|
"learning_rate": 5.297029702970298e-06, |
|
"loss": 1.4378, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4752475247524752, |
|
"grad_norm": 0.2980371415615082, |
|
"learning_rate": 5.272277227722773e-06, |
|
"loss": 1.4692, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4777227722772277, |
|
"grad_norm": 0.28939124941825867, |
|
"learning_rate": 5.247524752475248e-06, |
|
"loss": 1.4377, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.4801980198019802, |
|
"grad_norm": 0.2956065535545349, |
|
"learning_rate": 5.2227722772277234e-06, |
|
"loss": 1.4463, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.48267326732673266, |
|
"grad_norm": 0.3002106249332428, |
|
"learning_rate": 5.1980198019801986e-06, |
|
"loss": 1.4742, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.48514851485148514, |
|
"grad_norm": 0.3060540556907654, |
|
"learning_rate": 5.173267326732674e-06, |
|
"loss": 1.4824, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4876237623762376, |
|
"grad_norm": 0.3201966881752014, |
|
"learning_rate": 5.148514851485149e-06, |
|
"loss": 1.4513, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4900990099009901, |
|
"grad_norm": 0.290448933839798, |
|
"learning_rate": 5.123762376237624e-06, |
|
"loss": 1.4568, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.49257425742574257, |
|
"grad_norm": 0.3016184866428375, |
|
"learning_rate": 5.0990099009901e-06, |
|
"loss": 1.4687, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.49504950495049505, |
|
"grad_norm": 0.29369238018989563, |
|
"learning_rate": 5.074257425742575e-06, |
|
"loss": 1.4348, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4975247524752475, |
|
"grad_norm": 0.3146813213825226, |
|
"learning_rate": 5.04950495049505e-06, |
|
"loss": 1.4503, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.29575055837631226, |
|
"learning_rate": 5.024752475247525e-06, |
|
"loss": 1.4693, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5024752475247525, |
|
"grad_norm": 0.31400489807128906, |
|
"learning_rate": 5e-06, |
|
"loss": 1.4796, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.504950495049505, |
|
"grad_norm": 0.3068208396434784, |
|
"learning_rate": 4.9752475247524755e-06, |
|
"loss": 1.4259, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5074257425742574, |
|
"grad_norm": 0.3022879362106323, |
|
"learning_rate": 4.950495049504951e-06, |
|
"loss": 1.4409, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5099009900990099, |
|
"grad_norm": 0.3629034459590912, |
|
"learning_rate": 4.925742574257426e-06, |
|
"loss": 1.4441, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5123762376237624, |
|
"grad_norm": 0.2989904582500458, |
|
"learning_rate": 4.900990099009901e-06, |
|
"loss": 1.4463, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5148514851485149, |
|
"grad_norm": 0.3044170141220093, |
|
"learning_rate": 4.876237623762377e-06, |
|
"loss": 1.411, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5173267326732673, |
|
"grad_norm": 0.31803974509239197, |
|
"learning_rate": 4.851485148514852e-06, |
|
"loss": 1.4576, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5198019801980198, |
|
"grad_norm": 0.31258466839790344, |
|
"learning_rate": 4.826732673267327e-06, |
|
"loss": 1.4368, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5222772277227723, |
|
"grad_norm": 0.3041141629219055, |
|
"learning_rate": 4.801980198019802e-06, |
|
"loss": 1.4527, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5247524752475248, |
|
"grad_norm": 0.3276950418949127, |
|
"learning_rate": 4.777227722772277e-06, |
|
"loss": 1.4323, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5272277227722773, |
|
"grad_norm": 0.31132709980010986, |
|
"learning_rate": 4.7524752475247525e-06, |
|
"loss": 1.4398, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5297029702970297, |
|
"grad_norm": 0.29284676909446716, |
|
"learning_rate": 4.727722772277228e-06, |
|
"loss": 1.4485, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5321782178217822, |
|
"grad_norm": 0.30886998772621155, |
|
"learning_rate": 4.702970297029703e-06, |
|
"loss": 1.4251, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5346534653465347, |
|
"grad_norm": 0.31258538365364075, |
|
"learning_rate": 4.678217821782179e-06, |
|
"loss": 1.413, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5371287128712872, |
|
"grad_norm": 0.2904369533061981, |
|
"learning_rate": 4.653465346534654e-06, |
|
"loss": 1.45, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5396039603960396, |
|
"grad_norm": 0.29757335782051086, |
|
"learning_rate": 4.628712871287129e-06, |
|
"loss": 1.4612, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5420792079207921, |
|
"grad_norm": 0.33179768919944763, |
|
"learning_rate": 4.603960396039605e-06, |
|
"loss": 1.4399, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5445544554455446, |
|
"grad_norm": 0.31457144021987915, |
|
"learning_rate": 4.57920792079208e-06, |
|
"loss": 1.4292, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5470297029702971, |
|
"grad_norm": 0.3385794758796692, |
|
"learning_rate": 4.554455445544555e-06, |
|
"loss": 1.4999, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5495049504950495, |
|
"grad_norm": 0.3007061779499054, |
|
"learning_rate": 4.52970297029703e-06, |
|
"loss": 1.4111, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.551980198019802, |
|
"grad_norm": 0.2860008776187897, |
|
"learning_rate": 4.5049504950495054e-06, |
|
"loss": 1.4152, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5544554455445545, |
|
"grad_norm": 0.2954881489276886, |
|
"learning_rate": 4.4801980198019806e-06, |
|
"loss": 1.3589, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.556930693069307, |
|
"grad_norm": 0.37317365407943726, |
|
"learning_rate": 4.455445544554456e-06, |
|
"loss": 1.4971, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5594059405940595, |
|
"grad_norm": 0.299736887216568, |
|
"learning_rate": 4.430693069306931e-06, |
|
"loss": 1.3905, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5618811881188119, |
|
"grad_norm": 0.3162497282028198, |
|
"learning_rate": 4.405940594059406e-06, |
|
"loss": 1.4038, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5643564356435643, |
|
"grad_norm": 0.29667121171951294, |
|
"learning_rate": 4.381188118811882e-06, |
|
"loss": 1.4628, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5668316831683168, |
|
"grad_norm": 0.32302653789520264, |
|
"learning_rate": 4.356435643564357e-06, |
|
"loss": 1.3683, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5693069306930693, |
|
"grad_norm": 0.321042537689209, |
|
"learning_rate": 4.331683168316832e-06, |
|
"loss": 1.4483, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5717821782178217, |
|
"grad_norm": 0.304511159658432, |
|
"learning_rate": 4.306930693069307e-06, |
|
"loss": 1.427, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5742574257425742, |
|
"grad_norm": 0.3338593542575836, |
|
"learning_rate": 4.282178217821782e-06, |
|
"loss": 1.4192, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5767326732673267, |
|
"grad_norm": 0.29887259006500244, |
|
"learning_rate": 4.2574257425742575e-06, |
|
"loss": 1.4413, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5792079207920792, |
|
"grad_norm": 0.33678868412971497, |
|
"learning_rate": 4.232673267326733e-06, |
|
"loss": 1.4166, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5816831683168316, |
|
"grad_norm": 0.3048015236854553, |
|
"learning_rate": 4.207920792079208e-06, |
|
"loss": 1.4265, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5841584158415841, |
|
"grad_norm": 0.3110325336456299, |
|
"learning_rate": 4.183168316831684e-06, |
|
"loss": 1.4247, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5866336633663366, |
|
"grad_norm": 0.3002597689628601, |
|
"learning_rate": 4.158415841584159e-06, |
|
"loss": 1.4474, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5891089108910891, |
|
"grad_norm": 0.3254597783088684, |
|
"learning_rate": 4.133663366336634e-06, |
|
"loss": 1.459, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5915841584158416, |
|
"grad_norm": 0.3132658004760742, |
|
"learning_rate": 4.108910891089109e-06, |
|
"loss": 1.3866, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.594059405940594, |
|
"grad_norm": 0.3283675014972687, |
|
"learning_rate": 4.084158415841584e-06, |
|
"loss": 1.4337, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5965346534653465, |
|
"grad_norm": 0.3090082108974457, |
|
"learning_rate": 4.05940594059406e-06, |
|
"loss": 1.4339, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.599009900990099, |
|
"grad_norm": 0.32888296246528625, |
|
"learning_rate": 4.034653465346535e-06, |
|
"loss": 1.4231, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6014851485148515, |
|
"grad_norm": 0.3375103175640106, |
|
"learning_rate": 4.0099009900990104e-06, |
|
"loss": 1.479, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6039603960396039, |
|
"grad_norm": 0.30960792303085327, |
|
"learning_rate": 3.9851485148514856e-06, |
|
"loss": 1.4016, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6064356435643564, |
|
"grad_norm": 0.30529141426086426, |
|
"learning_rate": 3.960396039603961e-06, |
|
"loss": 1.4043, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6089108910891089, |
|
"grad_norm": 0.30488067865371704, |
|
"learning_rate": 3.935643564356436e-06, |
|
"loss": 1.447, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6113861386138614, |
|
"grad_norm": 0.32256123423576355, |
|
"learning_rate": 3.910891089108911e-06, |
|
"loss": 1.4041, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6138613861386139, |
|
"grad_norm": 0.3180142641067505, |
|
"learning_rate": 3.886138613861387e-06, |
|
"loss": 1.368, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6163366336633663, |
|
"grad_norm": 0.289456844329834, |
|
"learning_rate": 3.861386138613862e-06, |
|
"loss": 1.4125, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6188118811881188, |
|
"grad_norm": 0.3054012060165405, |
|
"learning_rate": 3.836633663366337e-06, |
|
"loss": 1.4223, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6212871287128713, |
|
"grad_norm": 0.3069523274898529, |
|
"learning_rate": 3.8118811881188123e-06, |
|
"loss": 1.4255, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6237623762376238, |
|
"grad_norm": 0.3207142651081085, |
|
"learning_rate": 3.7871287128712874e-06, |
|
"loss": 1.3785, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6262376237623762, |
|
"grad_norm": 0.28611576557159424, |
|
"learning_rate": 3.7623762376237625e-06, |
|
"loss": 1.4157, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6287128712871287, |
|
"grad_norm": 0.29343169927597046, |
|
"learning_rate": 3.737623762376238e-06, |
|
"loss": 1.395, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6311881188118812, |
|
"grad_norm": 0.31444689631462097, |
|
"learning_rate": 3.712871287128713e-06, |
|
"loss": 1.4157, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6336633663366337, |
|
"grad_norm": 0.30565670132637024, |
|
"learning_rate": 3.6881188118811883e-06, |
|
"loss": 1.4257, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6361386138613861, |
|
"grad_norm": 0.29752224683761597, |
|
"learning_rate": 3.6633663366336635e-06, |
|
"loss": 1.4174, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6386138613861386, |
|
"grad_norm": 0.3483918309211731, |
|
"learning_rate": 3.638613861386139e-06, |
|
"loss": 1.4271, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6410891089108911, |
|
"grad_norm": 0.33828166127204895, |
|
"learning_rate": 3.613861386138614e-06, |
|
"loss": 1.4173, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6435643564356436, |
|
"grad_norm": 0.3199135661125183, |
|
"learning_rate": 3.5891089108910892e-06, |
|
"loss": 1.4497, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6460396039603961, |
|
"grad_norm": 0.35107582807540894, |
|
"learning_rate": 3.5643564356435644e-06, |
|
"loss": 1.4543, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6485148514851485, |
|
"grad_norm": 0.2964717745780945, |
|
"learning_rate": 3.5396039603960395e-06, |
|
"loss": 1.3798, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.650990099009901, |
|
"grad_norm": 0.305396169424057, |
|
"learning_rate": 3.514851485148515e-06, |
|
"loss": 1.4083, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6534653465346535, |
|
"grad_norm": 0.33443814516067505, |
|
"learning_rate": 3.49009900990099e-06, |
|
"loss": 1.3901, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.655940594059406, |
|
"grad_norm": 0.3325764834880829, |
|
"learning_rate": 3.4653465346534653e-06, |
|
"loss": 1.4028, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6584158415841584, |
|
"grad_norm": 0.3248007595539093, |
|
"learning_rate": 3.4405940594059404e-06, |
|
"loss": 1.395, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6608910891089109, |
|
"grad_norm": 0.326393187046051, |
|
"learning_rate": 3.4158415841584164e-06, |
|
"loss": 1.4245, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6633663366336634, |
|
"grad_norm": 0.3411741852760315, |
|
"learning_rate": 3.3910891089108915e-06, |
|
"loss": 1.357, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6658415841584159, |
|
"grad_norm": 0.2952353358268738, |
|
"learning_rate": 3.3663366336633666e-06, |
|
"loss": 1.4015, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6683168316831684, |
|
"grad_norm": 0.29269590973854065, |
|
"learning_rate": 3.341584158415842e-06, |
|
"loss": 1.3943, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6707920792079208, |
|
"grad_norm": 0.34936144948005676, |
|
"learning_rate": 3.3168316831683173e-06, |
|
"loss": 1.4249, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6732673267326733, |
|
"grad_norm": 0.3835432529449463, |
|
"learning_rate": 3.2920792079207924e-06, |
|
"loss": 1.4073, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6757425742574258, |
|
"grad_norm": 0.29864177107810974, |
|
"learning_rate": 3.2673267326732676e-06, |
|
"loss": 1.3666, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6782178217821783, |
|
"grad_norm": 0.2940259575843811, |
|
"learning_rate": 3.242574257425743e-06, |
|
"loss": 1.395, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6806930693069307, |
|
"grad_norm": 0.2962116301059723, |
|
"learning_rate": 3.2178217821782182e-06, |
|
"loss": 1.3633, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6831683168316832, |
|
"grad_norm": 0.3066718876361847, |
|
"learning_rate": 3.1930693069306933e-06, |
|
"loss": 1.4122, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6856435643564357, |
|
"grad_norm": 0.31843486428260803, |
|
"learning_rate": 3.1683168316831685e-06, |
|
"loss": 1.439, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6881188118811881, |
|
"grad_norm": 0.30499327182769775, |
|
"learning_rate": 3.143564356435644e-06, |
|
"loss": 1.3989, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6905940594059405, |
|
"grad_norm": 0.30208632349967957, |
|
"learning_rate": 3.118811881188119e-06, |
|
"loss": 1.4379, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.693069306930693, |
|
"grad_norm": 0.3020811080932617, |
|
"learning_rate": 3.0940594059405943e-06, |
|
"loss": 1.3705, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6955445544554455, |
|
"grad_norm": 0.31347641348838806, |
|
"learning_rate": 3.0693069306930694e-06, |
|
"loss": 1.453, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.698019801980198, |
|
"grad_norm": 0.31867870688438416, |
|
"learning_rate": 3.044554455445545e-06, |
|
"loss": 1.3625, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7004950495049505, |
|
"grad_norm": 0.3544825613498688, |
|
"learning_rate": 3.01980198019802e-06, |
|
"loss": 1.4193, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7029702970297029, |
|
"grad_norm": 0.32157930731773376, |
|
"learning_rate": 2.995049504950495e-06, |
|
"loss": 1.4221, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7054455445544554, |
|
"grad_norm": 0.3847821354866028, |
|
"learning_rate": 2.9702970297029703e-06, |
|
"loss": 1.4646, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7079207920792079, |
|
"grad_norm": 0.31620582938194275, |
|
"learning_rate": 2.9455445544554454e-06, |
|
"loss": 1.3974, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7103960396039604, |
|
"grad_norm": 0.3170306980609894, |
|
"learning_rate": 2.920792079207921e-06, |
|
"loss": 1.3649, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7128712871287128, |
|
"grad_norm": 0.3135956823825836, |
|
"learning_rate": 2.896039603960396e-06, |
|
"loss": 1.4123, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7153465346534653, |
|
"grad_norm": 0.33243054151535034, |
|
"learning_rate": 2.8712871287128712e-06, |
|
"loss": 1.439, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7178217821782178, |
|
"grad_norm": 0.34006670117378235, |
|
"learning_rate": 2.8465346534653464e-06, |
|
"loss": 1.4413, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7202970297029703, |
|
"grad_norm": 0.3046601414680481, |
|
"learning_rate": 2.821782178217822e-06, |
|
"loss": 1.3925, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7227722772277227, |
|
"grad_norm": 0.3214465379714966, |
|
"learning_rate": 2.7970297029702974e-06, |
|
"loss": 1.4088, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7252475247524752, |
|
"grad_norm": 0.3489651083946228, |
|
"learning_rate": 2.7722772277227726e-06, |
|
"loss": 1.3971, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7277227722772277, |
|
"grad_norm": 0.30379924178123474, |
|
"learning_rate": 2.747524752475248e-06, |
|
"loss": 1.4208, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7301980198019802, |
|
"grad_norm": 0.3187316060066223, |
|
"learning_rate": 2.7227722772277232e-06, |
|
"loss": 1.3674, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7326732673267327, |
|
"grad_norm": 0.3093225657939911, |
|
"learning_rate": 2.6980198019801984e-06, |
|
"loss": 1.3971, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7351485148514851, |
|
"grad_norm": 0.34428709745407104, |
|
"learning_rate": 2.6732673267326735e-06, |
|
"loss": 1.3967, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7376237623762376, |
|
"grad_norm": 0.30280086398124695, |
|
"learning_rate": 2.648514851485149e-06, |
|
"loss": 1.4361, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7400990099009901, |
|
"grad_norm": 0.30049508810043335, |
|
"learning_rate": 2.623762376237624e-06, |
|
"loss": 1.4157, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7425742574257426, |
|
"grad_norm": 0.293844074010849, |
|
"learning_rate": 2.5990099009900993e-06, |
|
"loss": 1.4001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.745049504950495, |
|
"grad_norm": 0.2977203130722046, |
|
"learning_rate": 2.5742574257425744e-06, |
|
"loss": 1.411, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.7475247524752475, |
|
"grad_norm": 0.30808597803115845, |
|
"learning_rate": 2.54950495049505e-06, |
|
"loss": 1.4009, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.36923572421073914, |
|
"learning_rate": 2.524752475247525e-06, |
|
"loss": 1.413, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7524752475247525, |
|
"grad_norm": 0.30836039781570435, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.4145, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.754950495049505, |
|
"grad_norm": 0.3002849817276001, |
|
"learning_rate": 2.4752475247524753e-06, |
|
"loss": 1.4077, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7574257425742574, |
|
"grad_norm": 0.31670308113098145, |
|
"learning_rate": 2.4504950495049505e-06, |
|
"loss": 1.3622, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7599009900990099, |
|
"grad_norm": 0.32291552424430847, |
|
"learning_rate": 2.425742574257426e-06, |
|
"loss": 1.4209, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7623762376237624, |
|
"grad_norm": 0.298605740070343, |
|
"learning_rate": 2.400990099009901e-06, |
|
"loss": 1.398, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7648514851485149, |
|
"grad_norm": 0.32863232493400574, |
|
"learning_rate": 2.3762376237623762e-06, |
|
"loss": 1.3914, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7673267326732673, |
|
"grad_norm": 0.30840128660202026, |
|
"learning_rate": 2.3514851485148514e-06, |
|
"loss": 1.3713, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7698019801980198, |
|
"grad_norm": 0.29777318239212036, |
|
"learning_rate": 2.326732673267327e-06, |
|
"loss": 1.4103, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7722772277227723, |
|
"grad_norm": 0.30536577105522156, |
|
"learning_rate": 2.3019801980198025e-06, |
|
"loss": 1.4202, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7747524752475248, |
|
"grad_norm": 0.31859514117240906, |
|
"learning_rate": 2.2772277227722776e-06, |
|
"loss": 1.3604, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7772277227722773, |
|
"grad_norm": 0.3363761007785797, |
|
"learning_rate": 2.2524752475247527e-06, |
|
"loss": 1.3867, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.7797029702970297, |
|
"grad_norm": 0.3162858784198761, |
|
"learning_rate": 2.227722772277228e-06, |
|
"loss": 1.4132, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7821782178217822, |
|
"grad_norm": 0.3257865309715271, |
|
"learning_rate": 2.202970297029703e-06, |
|
"loss": 1.376, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7846534653465347, |
|
"grad_norm": 0.30780932307243347, |
|
"learning_rate": 2.1782178217821785e-06, |
|
"loss": 1.3839, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7871287128712872, |
|
"grad_norm": 0.31187111139297485, |
|
"learning_rate": 2.1534653465346536e-06, |
|
"loss": 1.3933, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7896039603960396, |
|
"grad_norm": 0.30921033024787903, |
|
"learning_rate": 2.1287128712871288e-06, |
|
"loss": 1.4032, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 0.30851835012435913, |
|
"learning_rate": 2.103960396039604e-06, |
|
"loss": 1.3603, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7945544554455446, |
|
"grad_norm": 0.31143277883529663, |
|
"learning_rate": 2.0792079207920794e-06, |
|
"loss": 1.4173, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7970297029702971, |
|
"grad_norm": 0.35872605443000793, |
|
"learning_rate": 2.0544554455445546e-06, |
|
"loss": 1.3677, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7995049504950495, |
|
"grad_norm": 0.3374822437763214, |
|
"learning_rate": 2.02970297029703e-06, |
|
"loss": 1.3537, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.801980198019802, |
|
"grad_norm": 0.2937536835670471, |
|
"learning_rate": 2.0049504950495052e-06, |
|
"loss": 1.3737, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8044554455445545, |
|
"grad_norm": 0.31206777691841125, |
|
"learning_rate": 1.9801980198019803e-06, |
|
"loss": 1.419, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.806930693069307, |
|
"grad_norm": 0.3164346218109131, |
|
"learning_rate": 1.9554455445544555e-06, |
|
"loss": 1.385, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8094059405940595, |
|
"grad_norm": 0.33687278628349304, |
|
"learning_rate": 1.930693069306931e-06, |
|
"loss": 1.3889, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8118811881188119, |
|
"grad_norm": 0.3687054216861725, |
|
"learning_rate": 1.9059405940594061e-06, |
|
"loss": 1.4176, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8143564356435643, |
|
"grad_norm": 0.3122500777244568, |
|
"learning_rate": 1.8811881188118813e-06, |
|
"loss": 1.3692, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8168316831683168, |
|
"grad_norm": 0.29484283924102783, |
|
"learning_rate": 1.8564356435643566e-06, |
|
"loss": 1.3515, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8193069306930693, |
|
"grad_norm": 0.3318758010864258, |
|
"learning_rate": 1.8316831683168317e-06, |
|
"loss": 1.4136, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8217821782178217, |
|
"grad_norm": 0.3211475610733032, |
|
"learning_rate": 1.806930693069307e-06, |
|
"loss": 1.3916, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8242574257425742, |
|
"grad_norm": 0.30757591128349304, |
|
"learning_rate": 1.7821782178217822e-06, |
|
"loss": 1.3706, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8267326732673267, |
|
"grad_norm": 0.34898415207862854, |
|
"learning_rate": 1.7574257425742575e-06, |
|
"loss": 1.3701, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8292079207920792, |
|
"grad_norm": 0.2998105585575104, |
|
"learning_rate": 1.7326732673267326e-06, |
|
"loss": 1.3618, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8316831683168316, |
|
"grad_norm": 0.2943211495876312, |
|
"learning_rate": 1.7079207920792082e-06, |
|
"loss": 1.4091, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.8341584158415841, |
|
"grad_norm": 0.3602202534675598, |
|
"learning_rate": 1.6831683168316833e-06, |
|
"loss": 1.4153, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8366336633663366, |
|
"grad_norm": 0.29592642188072205, |
|
"learning_rate": 1.6584158415841587e-06, |
|
"loss": 1.3734, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8391089108910891, |
|
"grad_norm": 0.29323434829711914, |
|
"learning_rate": 1.6336633663366338e-06, |
|
"loss": 1.388, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8415841584158416, |
|
"grad_norm": 0.3228212594985962, |
|
"learning_rate": 1.6089108910891091e-06, |
|
"loss": 1.4158, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.844059405940594, |
|
"grad_norm": 0.3537634015083313, |
|
"learning_rate": 1.5841584158415842e-06, |
|
"loss": 1.4491, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.8465346534653465, |
|
"grad_norm": 0.3257499933242798, |
|
"learning_rate": 1.5594059405940596e-06, |
|
"loss": 1.3763, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.849009900990099, |
|
"grad_norm": 0.3112364709377289, |
|
"learning_rate": 1.5346534653465347e-06, |
|
"loss": 1.4097, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.8514851485148515, |
|
"grad_norm": 0.29781144857406616, |
|
"learning_rate": 1.50990099009901e-06, |
|
"loss": 1.3658, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8539603960396039, |
|
"grad_norm": 0.3098454773426056, |
|
"learning_rate": 1.4851485148514852e-06, |
|
"loss": 1.3384, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8564356435643564, |
|
"grad_norm": 0.3077802062034607, |
|
"learning_rate": 1.4603960396039605e-06, |
|
"loss": 1.3846, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.8589108910891089, |
|
"grad_norm": 0.3286285400390625, |
|
"learning_rate": 1.4356435643564356e-06, |
|
"loss": 1.3673, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.8613861386138614, |
|
"grad_norm": 0.3004399538040161, |
|
"learning_rate": 1.410891089108911e-06, |
|
"loss": 1.4044, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8638613861386139, |
|
"grad_norm": 0.29809755086898804, |
|
"learning_rate": 1.3861386138613863e-06, |
|
"loss": 1.394, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.8663366336633663, |
|
"grad_norm": 0.34286266565322876, |
|
"learning_rate": 1.3613861386138616e-06, |
|
"loss": 1.3938, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8688118811881188, |
|
"grad_norm": 0.3307294547557831, |
|
"learning_rate": 1.3366336633663367e-06, |
|
"loss": 1.3634, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.8712871287128713, |
|
"grad_norm": 0.3771167993545532, |
|
"learning_rate": 1.311881188118812e-06, |
|
"loss": 1.3944, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8737623762376238, |
|
"grad_norm": 0.2910442352294922, |
|
"learning_rate": 1.2871287128712872e-06, |
|
"loss": 1.3502, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8762376237623762, |
|
"grad_norm": 0.30816197395324707, |
|
"learning_rate": 1.2623762376237625e-06, |
|
"loss": 1.4092, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8787128712871287, |
|
"grad_norm": 0.31113070249557495, |
|
"learning_rate": 1.2376237623762377e-06, |
|
"loss": 1.3655, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8811881188118812, |
|
"grad_norm": 0.3404984474182129, |
|
"learning_rate": 1.212871287128713e-06, |
|
"loss": 1.3988, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8836633663366337, |
|
"grad_norm": 0.318690687417984, |
|
"learning_rate": 1.1881188118811881e-06, |
|
"loss": 1.4033, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8861386138613861, |
|
"grad_norm": 0.3162771165370941, |
|
"learning_rate": 1.1633663366336635e-06, |
|
"loss": 1.3273, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8886138613861386, |
|
"grad_norm": 0.3195946514606476, |
|
"learning_rate": 1.1386138613861388e-06, |
|
"loss": 1.402, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8910891089108911, |
|
"grad_norm": 0.3035919964313507, |
|
"learning_rate": 1.113861386138614e-06, |
|
"loss": 1.3771, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8935643564356436, |
|
"grad_norm": 0.3509937524795532, |
|
"learning_rate": 1.0891089108910893e-06, |
|
"loss": 1.3768, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8960396039603961, |
|
"grad_norm": 0.29600340127944946, |
|
"learning_rate": 1.0643564356435644e-06, |
|
"loss": 1.3833, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8985148514851485, |
|
"grad_norm": 0.31444939970970154, |
|
"learning_rate": 1.0396039603960397e-06, |
|
"loss": 1.4297, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.900990099009901, |
|
"grad_norm": 0.31744831800460815, |
|
"learning_rate": 1.014851485148515e-06, |
|
"loss": 1.3888, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9034653465346535, |
|
"grad_norm": 0.30338016152381897, |
|
"learning_rate": 9.900990099009902e-07, |
|
"loss": 1.399, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.905940594059406, |
|
"grad_norm": 0.35813769698143005, |
|
"learning_rate": 9.653465346534655e-07, |
|
"loss": 1.4256, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.9084158415841584, |
|
"grad_norm": 0.2930937707424164, |
|
"learning_rate": 9.405940594059406e-07, |
|
"loss": 1.3911, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9108910891089109, |
|
"grad_norm": 0.307483971118927, |
|
"learning_rate": 9.158415841584159e-07, |
|
"loss": 1.3864, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9133663366336634, |
|
"grad_norm": 0.31432434916496277, |
|
"learning_rate": 8.910891089108911e-07, |
|
"loss": 1.3408, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9158415841584159, |
|
"grad_norm": 0.38564974069595337, |
|
"learning_rate": 8.663366336633663e-07, |
|
"loss": 1.4199, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9183168316831684, |
|
"grad_norm": 0.3292260468006134, |
|
"learning_rate": 8.415841584158417e-07, |
|
"loss": 1.3685, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.9207920792079208, |
|
"grad_norm": 0.466034859418869, |
|
"learning_rate": 8.168316831683169e-07, |
|
"loss": 1.353, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.9232673267326733, |
|
"grad_norm": 0.4319489896297455, |
|
"learning_rate": 7.920792079207921e-07, |
|
"loss": 1.4056, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.9257425742574258, |
|
"grad_norm": 0.3164903521537781, |
|
"learning_rate": 7.673267326732673e-07, |
|
"loss": 1.38, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9282178217821783, |
|
"grad_norm": 0.30668383836746216, |
|
"learning_rate": 7.425742574257426e-07, |
|
"loss": 1.4189, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9306930693069307, |
|
"grad_norm": 0.3274458646774292, |
|
"learning_rate": 7.178217821782178e-07, |
|
"loss": 1.4107, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9331683168316832, |
|
"grad_norm": 0.3504544496536255, |
|
"learning_rate": 6.930693069306931e-07, |
|
"loss": 1.3985, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.9356435643564357, |
|
"grad_norm": 0.30881020426750183, |
|
"learning_rate": 6.683168316831684e-07, |
|
"loss": 1.3907, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.9381188118811881, |
|
"grad_norm": 0.31699880957603455, |
|
"learning_rate": 6.435643564356436e-07, |
|
"loss": 1.391, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.9405940594059405, |
|
"grad_norm": 0.4385339021682739, |
|
"learning_rate": 6.188118811881188e-07, |
|
"loss": 1.3167, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.943069306930693, |
|
"grad_norm": 0.30269816517829895, |
|
"learning_rate": 5.940594059405941e-07, |
|
"loss": 1.3883, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9455445544554455, |
|
"grad_norm": 0.3730785846710205, |
|
"learning_rate": 5.693069306930694e-07, |
|
"loss": 1.3889, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.948019801980198, |
|
"grad_norm": 0.2997334599494934, |
|
"learning_rate": 5.445544554455446e-07, |
|
"loss": 1.3676, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"grad_norm": 0.31710949540138245, |
|
"learning_rate": 5.198019801980199e-07, |
|
"loss": 1.3599, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.9529702970297029, |
|
"grad_norm": 0.31283149123191833, |
|
"learning_rate": 4.950495049504951e-07, |
|
"loss": 1.4165, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9554455445544554, |
|
"grad_norm": 0.2956486642360687, |
|
"learning_rate": 4.702970297029703e-07, |
|
"loss": 1.3856, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.9579207920792079, |
|
"grad_norm": 0.3077276051044464, |
|
"learning_rate": 4.4554455445544555e-07, |
|
"loss": 1.3522, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.9603960396039604, |
|
"grad_norm": 0.3776938021183014, |
|
"learning_rate": 4.2079207920792083e-07, |
|
"loss": 1.3514, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.9628712871287128, |
|
"grad_norm": 0.2987917959690094, |
|
"learning_rate": 3.9603960396039606e-07, |
|
"loss": 1.3761, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.9653465346534653, |
|
"grad_norm": 0.28841686248779297, |
|
"learning_rate": 3.712871287128713e-07, |
|
"loss": 1.4033, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9678217821782178, |
|
"grad_norm": 0.3757478594779968, |
|
"learning_rate": 3.4653465346534657e-07, |
|
"loss": 1.3696, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.9702970297029703, |
|
"grad_norm": 0.3242747187614441, |
|
"learning_rate": 3.217821782178218e-07, |
|
"loss": 1.3525, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.9727722772277227, |
|
"grad_norm": 0.3164178431034088, |
|
"learning_rate": 2.9702970297029703e-07, |
|
"loss": 1.4161, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.9752475247524752, |
|
"grad_norm": 0.29471248388290405, |
|
"learning_rate": 2.722772277227723e-07, |
|
"loss": 1.39, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.9777227722772277, |
|
"grad_norm": 0.3437105417251587, |
|
"learning_rate": 2.4752475247524754e-07, |
|
"loss": 1.4015, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.9801980198019802, |
|
"grad_norm": 0.3044927716255188, |
|
"learning_rate": 2.2277227722772277e-07, |
|
"loss": 1.3551, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9826732673267327, |
|
"grad_norm": 0.3083963990211487, |
|
"learning_rate": 1.9801980198019803e-07, |
|
"loss": 1.3673, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9851485148514851, |
|
"grad_norm": 0.35897475481033325, |
|
"learning_rate": 1.7326732673267329e-07, |
|
"loss": 1.3399, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9876237623762376, |
|
"grad_norm": 0.28752899169921875, |
|
"learning_rate": 1.4851485148514852e-07, |
|
"loss": 1.3829, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"grad_norm": 0.29315048456192017, |
|
"learning_rate": 1.2376237623762377e-07, |
|
"loss": 1.4124, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9925742574257426, |
|
"grad_norm": 0.29938578605651855, |
|
"learning_rate": 9.900990099009901e-08, |
|
"loss": 1.3581, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.995049504950495, |
|
"grad_norm": 0.30865898728370667, |
|
"learning_rate": 7.425742574257426e-08, |
|
"loss": 1.3965, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9975247524752475, |
|
"grad_norm": 0.32963815331459045, |
|
"learning_rate": 4.950495049504951e-08, |
|
"loss": 1.3963, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3176136612892151, |
|
"learning_rate": 2.4752475247524754e-08, |
|
"loss": 1.3705, |
|
"step": 404 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 404, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5634749540204544e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|