|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 2376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008417508417508417, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.990179573512907e-05, |
|
"loss": 3.4797, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016835016835016835, |
|
"grad_norm": 1.8723673820495605, |
|
"learning_rate": 4.97615039281706e-05, |
|
"loss": 3.4589, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025252525252525252, |
|
"grad_norm": 1.6116461753845215, |
|
"learning_rate": 4.962121212121213e-05, |
|
"loss": 3.3366, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03367003367003367, |
|
"grad_norm": 1.6241545677185059, |
|
"learning_rate": 4.9480920314253646e-05, |
|
"loss": 3.0591, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04208754208754209, |
|
"grad_norm": 1.6978979110717773, |
|
"learning_rate": 4.934062850729518e-05, |
|
"loss": 3.2064, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.050505050505050504, |
|
"grad_norm": 2.229762077331543, |
|
"learning_rate": 4.92003367003367e-05, |
|
"loss": 3.0569, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.058922558922558925, |
|
"grad_norm": 1.3693021535873413, |
|
"learning_rate": 4.906004489337823e-05, |
|
"loss": 2.9715, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06734006734006734, |
|
"grad_norm": 1.712484359741211, |
|
"learning_rate": 4.891975308641975e-05, |
|
"loss": 2.9787, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07575757575757576, |
|
"grad_norm": 1.933789610862732, |
|
"learning_rate": 4.877946127946128e-05, |
|
"loss": 2.9016, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08417508417508418, |
|
"grad_norm": 2.0508992671966553, |
|
"learning_rate": 4.863916947250281e-05, |
|
"loss": 2.93, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09259259259259259, |
|
"grad_norm": 1.8941344022750854, |
|
"learning_rate": 4.8498877665544335e-05, |
|
"loss": 2.852, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10101010101010101, |
|
"grad_norm": 1.5971471071243286, |
|
"learning_rate": 4.835858585858586e-05, |
|
"loss": 2.8484, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10942760942760943, |
|
"grad_norm": 1.8011534214019775, |
|
"learning_rate": 4.8218294051627386e-05, |
|
"loss": 2.8801, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11784511784511785, |
|
"grad_norm": 2.071338176727295, |
|
"learning_rate": 4.807800224466891e-05, |
|
"loss": 2.7344, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12626262626262627, |
|
"grad_norm": 1.5685261487960815, |
|
"learning_rate": 4.793771043771044e-05, |
|
"loss": 2.8886, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13468013468013468, |
|
"grad_norm": 1.719056487083435, |
|
"learning_rate": 4.779741863075197e-05, |
|
"loss": 2.7326, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14309764309764308, |
|
"grad_norm": 1.8598765134811401, |
|
"learning_rate": 4.765712682379349e-05, |
|
"loss": 2.7544, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15151515151515152, |
|
"grad_norm": 1.7602612972259521, |
|
"learning_rate": 4.751683501683502e-05, |
|
"loss": 2.7759, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15993265993265993, |
|
"grad_norm": 2.0498409271240234, |
|
"learning_rate": 4.7376543209876543e-05, |
|
"loss": 2.7495, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16835016835016836, |
|
"grad_norm": 1.8029284477233887, |
|
"learning_rate": 4.723625140291807e-05, |
|
"loss": 2.6568, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17676767676767677, |
|
"grad_norm": 2.052203416824341, |
|
"learning_rate": 4.70959595959596e-05, |
|
"loss": 2.6162, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 1.693206787109375, |
|
"learning_rate": 4.6955667789001126e-05, |
|
"loss": 2.7206, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1936026936026936, |
|
"grad_norm": 1.7983921766281128, |
|
"learning_rate": 4.681537598204265e-05, |
|
"loss": 2.699, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20202020202020202, |
|
"grad_norm": 2.1269702911376953, |
|
"learning_rate": 4.6675084175084176e-05, |
|
"loss": 2.8926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21043771043771045, |
|
"grad_norm": 2.1926703453063965, |
|
"learning_rate": 4.65347923681257e-05, |
|
"loss": 2.6839, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21885521885521886, |
|
"grad_norm": 1.9844324588775635, |
|
"learning_rate": 4.639450056116723e-05, |
|
"loss": 2.6166, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 2.1632473468780518, |
|
"learning_rate": 4.625420875420876e-05, |
|
"loss": 2.6975, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2356902356902357, |
|
"grad_norm": 1.8100026845932007, |
|
"learning_rate": 4.6113916947250283e-05, |
|
"loss": 2.7679, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2441077441077441, |
|
"grad_norm": 1.765960693359375, |
|
"learning_rate": 4.597362514029181e-05, |
|
"loss": 2.6372, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.25252525252525254, |
|
"grad_norm": 2.0554540157318115, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 2.707, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2609427609427609, |
|
"grad_norm": 1.662891149520874, |
|
"learning_rate": 4.5693041526374866e-05, |
|
"loss": 2.5671, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.26936026936026936, |
|
"grad_norm": 1.9311376810073853, |
|
"learning_rate": 4.555274971941639e-05, |
|
"loss": 2.6173, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 2.077493667602539, |
|
"learning_rate": 4.541245791245791e-05, |
|
"loss": 2.7469, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.28619528619528617, |
|
"grad_norm": 1.8855682611465454, |
|
"learning_rate": 4.527216610549944e-05, |
|
"loss": 2.6762, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2946127946127946, |
|
"grad_norm": 3.0554871559143066, |
|
"learning_rate": 4.5131874298540966e-05, |
|
"loss": 2.6689, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 2.175938129425049, |
|
"learning_rate": 4.49915824915825e-05, |
|
"loss": 2.5579, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3114478114478115, |
|
"grad_norm": 2.187106132507324, |
|
"learning_rate": 4.485129068462402e-05, |
|
"loss": 2.6166, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.31986531986531985, |
|
"grad_norm": 2.1227610111236572, |
|
"learning_rate": 4.471099887766554e-05, |
|
"loss": 2.6637, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3282828282828283, |
|
"grad_norm": 2.3033812046051025, |
|
"learning_rate": 4.4570707070707074e-05, |
|
"loss": 2.6204, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3367003367003367, |
|
"grad_norm": 2.34395432472229, |
|
"learning_rate": 4.44304152637486e-05, |
|
"loss": 2.6941, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3451178451178451, |
|
"grad_norm": 1.7339164018630981, |
|
"learning_rate": 4.429012345679013e-05, |
|
"loss": 2.5516, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.35353535353535354, |
|
"grad_norm": 2.3480944633483887, |
|
"learning_rate": 4.414983164983165e-05, |
|
"loss": 2.5533, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.36195286195286197, |
|
"grad_norm": 2.076554298400879, |
|
"learning_rate": 4.4009539842873175e-05, |
|
"loss": 2.5942, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 2.461144208908081, |
|
"learning_rate": 4.3869248035914707e-05, |
|
"loss": 2.7073, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3787878787878788, |
|
"grad_norm": 2.3409440517425537, |
|
"learning_rate": 4.372895622895623e-05, |
|
"loss": 2.551, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3872053872053872, |
|
"grad_norm": 2.0938796997070312, |
|
"learning_rate": 4.358866442199776e-05, |
|
"loss": 2.5157, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3956228956228956, |
|
"grad_norm": 2.0075490474700928, |
|
"learning_rate": 4.344837261503928e-05, |
|
"loss": 2.6791, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 2.0483977794647217, |
|
"learning_rate": 4.330808080808081e-05, |
|
"loss": 2.5285, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.41245791245791247, |
|
"grad_norm": 2.0210118293762207, |
|
"learning_rate": 4.316778900112234e-05, |
|
"loss": 2.4894, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4208754208754209, |
|
"grad_norm": 2.2782247066497803, |
|
"learning_rate": 4.3027497194163864e-05, |
|
"loss": 2.541, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4292929292929293, |
|
"grad_norm": 2.6043848991394043, |
|
"learning_rate": 4.288720538720539e-05, |
|
"loss": 2.5162, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4377104377104377, |
|
"grad_norm": 2.8767223358154297, |
|
"learning_rate": 4.2746913580246915e-05, |
|
"loss": 2.5854, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.44612794612794615, |
|
"grad_norm": 2.8873238563537598, |
|
"learning_rate": 4.260662177328844e-05, |
|
"loss": 2.6396, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 2.052957057952881, |
|
"learning_rate": 4.246632996632997e-05, |
|
"loss": 2.4754, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.46296296296296297, |
|
"grad_norm": 2.027665376663208, |
|
"learning_rate": 4.23260381593715e-05, |
|
"loss": 2.4765, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4713804713804714, |
|
"grad_norm": 2.368720293045044, |
|
"learning_rate": 4.218574635241302e-05, |
|
"loss": 2.4542, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4797979797979798, |
|
"grad_norm": 2.33085298538208, |
|
"learning_rate": 4.204545454545455e-05, |
|
"loss": 2.4622, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4882154882154882, |
|
"grad_norm": 2.237562417984009, |
|
"learning_rate": 4.190516273849607e-05, |
|
"loss": 2.5542, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.49663299663299665, |
|
"grad_norm": 2.0441579818725586, |
|
"learning_rate": 4.17648709315376e-05, |
|
"loss": 2.4717, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5050505050505051, |
|
"grad_norm": 2.248406171798706, |
|
"learning_rate": 4.162457912457913e-05, |
|
"loss": 2.4661, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5134680134680135, |
|
"grad_norm": 2.1775243282318115, |
|
"learning_rate": 4.1484287317620655e-05, |
|
"loss": 2.5614, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5218855218855218, |
|
"grad_norm": 2.319425344467163, |
|
"learning_rate": 4.134399551066218e-05, |
|
"loss": 2.5012, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5303030303030303, |
|
"grad_norm": 2.232248544692993, |
|
"learning_rate": 4.1203703703703705e-05, |
|
"loss": 2.5689, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5387205387205387, |
|
"grad_norm": 2.3449838161468506, |
|
"learning_rate": 4.106341189674523e-05, |
|
"loss": 2.4287, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5471380471380471, |
|
"grad_norm": 2.412785768508911, |
|
"learning_rate": 4.092312008978676e-05, |
|
"loss": 2.5041, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 2.432955503463745, |
|
"learning_rate": 4.078282828282828e-05, |
|
"loss": 2.4476, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.563973063973064, |
|
"grad_norm": 2.642298698425293, |
|
"learning_rate": 4.064253647586981e-05, |
|
"loss": 2.421, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5723905723905723, |
|
"grad_norm": 2.232794761657715, |
|
"learning_rate": 4.050224466891134e-05, |
|
"loss": 2.5912, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5808080808080808, |
|
"grad_norm": 2.8323323726654053, |
|
"learning_rate": 4.036195286195286e-05, |
|
"loss": 2.3986, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5892255892255892, |
|
"grad_norm": 4.2033891677856445, |
|
"learning_rate": 4.0221661054994395e-05, |
|
"loss": 2.5413, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5976430976430976, |
|
"grad_norm": 2.3608999252319336, |
|
"learning_rate": 4.008136924803591e-05, |
|
"loss": 2.5453, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 2.5277252197265625, |
|
"learning_rate": 3.9941077441077445e-05, |
|
"loss": 2.4956, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6144781144781145, |
|
"grad_norm": 2.5772793292999268, |
|
"learning_rate": 3.980078563411897e-05, |
|
"loss": 2.5238, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.622895622895623, |
|
"grad_norm": 2.718047618865967, |
|
"learning_rate": 3.9660493827160496e-05, |
|
"loss": 2.4327, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6313131313131313, |
|
"grad_norm": 2.5964958667755127, |
|
"learning_rate": 3.952020202020202e-05, |
|
"loss": 2.5208, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6397306397306397, |
|
"grad_norm": 2.4600930213928223, |
|
"learning_rate": 3.9379910213243546e-05, |
|
"loss": 2.6661, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6481481481481481, |
|
"grad_norm": 2.449575662612915, |
|
"learning_rate": 3.923961840628507e-05, |
|
"loss": 2.5442, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6565656565656566, |
|
"grad_norm": 2.3219118118286133, |
|
"learning_rate": 3.90993265993266e-05, |
|
"loss": 2.3805, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.664983164983165, |
|
"grad_norm": 2.774872064590454, |
|
"learning_rate": 3.895903479236813e-05, |
|
"loss": 2.4343, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6734006734006734, |
|
"grad_norm": 2.466688632965088, |
|
"learning_rate": 3.881874298540965e-05, |
|
"loss": 2.4088, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 3.1128170490264893, |
|
"learning_rate": 3.867845117845118e-05, |
|
"loss": 2.5768, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6902356902356902, |
|
"grad_norm": 4.245325088500977, |
|
"learning_rate": 3.8538159371492704e-05, |
|
"loss": 2.3917, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6986531986531986, |
|
"grad_norm": 2.6386542320251465, |
|
"learning_rate": 3.8397867564534236e-05, |
|
"loss": 2.3603, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7070707070707071, |
|
"grad_norm": 3.0349717140197754, |
|
"learning_rate": 3.825757575757576e-05, |
|
"loss": 2.5057, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7154882154882155, |
|
"grad_norm": 2.524481773376465, |
|
"learning_rate": 3.8117283950617286e-05, |
|
"loss": 2.5321, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7239057239057239, |
|
"grad_norm": 3.5061841011047363, |
|
"learning_rate": 3.797699214365881e-05, |
|
"loss": 2.6322, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7323232323232324, |
|
"grad_norm": 2.5284855365753174, |
|
"learning_rate": 3.7836700336700336e-05, |
|
"loss": 2.5965, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 2.923661231994629, |
|
"learning_rate": 3.769640852974187e-05, |
|
"loss": 2.4343, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7491582491582491, |
|
"grad_norm": 2.788609743118286, |
|
"learning_rate": 3.7556116722783393e-05, |
|
"loss": 2.4594, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 2.8142261505126953, |
|
"learning_rate": 3.741582491582492e-05, |
|
"loss": 2.4316, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.765993265993266, |
|
"grad_norm": 2.7626090049743652, |
|
"learning_rate": 3.7275533108866444e-05, |
|
"loss": 2.454, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7744107744107744, |
|
"grad_norm": 2.700515031814575, |
|
"learning_rate": 3.713524130190797e-05, |
|
"loss": 2.5807, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7828282828282829, |
|
"grad_norm": 2.9090945720672607, |
|
"learning_rate": 3.69949494949495e-05, |
|
"loss": 2.461, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7912457912457912, |
|
"grad_norm": 2.439922332763672, |
|
"learning_rate": 3.6854657687991026e-05, |
|
"loss": 2.454, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7996632996632996, |
|
"grad_norm": 2.9203062057495117, |
|
"learning_rate": 3.6714365881032544e-05, |
|
"loss": 2.4361, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 2.6928439140319824, |
|
"learning_rate": 3.6574074074074076e-05, |
|
"loss": 2.4572, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8164983164983165, |
|
"grad_norm": 2.66032338142395, |
|
"learning_rate": 3.64337822671156e-05, |
|
"loss": 2.4882, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8249158249158249, |
|
"grad_norm": 2.4645493030548096, |
|
"learning_rate": 3.6293490460157134e-05, |
|
"loss": 2.4432, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 2.7508137226104736, |
|
"learning_rate": 3.615319865319866e-05, |
|
"loss": 2.3677, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8417508417508418, |
|
"grad_norm": 3.1264898777008057, |
|
"learning_rate": 3.601290684624018e-05, |
|
"loss": 2.444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8501683501683501, |
|
"grad_norm": 2.5735745429992676, |
|
"learning_rate": 3.587261503928171e-05, |
|
"loss": 2.4514, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8585858585858586, |
|
"grad_norm": 2.614163637161255, |
|
"learning_rate": 3.5732323232323234e-05, |
|
"loss": 2.4012, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.867003367003367, |
|
"grad_norm": 3.058293342590332, |
|
"learning_rate": 3.559203142536476e-05, |
|
"loss": 2.5346, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8754208754208754, |
|
"grad_norm": 2.299180507659912, |
|
"learning_rate": 3.5451739618406285e-05, |
|
"loss": 2.4102, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8838383838383839, |
|
"grad_norm": 2.3540875911712646, |
|
"learning_rate": 3.531144781144781e-05, |
|
"loss": 2.3884, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8922558922558923, |
|
"grad_norm": 2.4985461235046387, |
|
"learning_rate": 3.517115600448934e-05, |
|
"loss": 2.5071, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9006734006734006, |
|
"grad_norm": 2.615652322769165, |
|
"learning_rate": 3.503086419753087e-05, |
|
"loss": 2.4177, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.617262601852417, |
|
"learning_rate": 3.489057239057239e-05, |
|
"loss": 2.3434, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9175084175084175, |
|
"grad_norm": 2.422133445739746, |
|
"learning_rate": 3.475028058361392e-05, |
|
"loss": 2.4621, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 2.65753173828125, |
|
"learning_rate": 3.460998877665544e-05, |
|
"loss": 2.398, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9343434343434344, |
|
"grad_norm": 2.8853819370269775, |
|
"learning_rate": 3.4469696969696974e-05, |
|
"loss": 2.5412, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9427609427609428, |
|
"grad_norm": 2.287621021270752, |
|
"learning_rate": 3.43294051627385e-05, |
|
"loss": 2.3842, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9511784511784511, |
|
"grad_norm": 2.8708698749542236, |
|
"learning_rate": 3.4189113355780025e-05, |
|
"loss": 2.365, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9595959595959596, |
|
"grad_norm": 2.9824109077453613, |
|
"learning_rate": 3.404882154882155e-05, |
|
"loss": 2.5138, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.968013468013468, |
|
"grad_norm": 3.031399726867676, |
|
"learning_rate": 3.3908529741863075e-05, |
|
"loss": 2.4326, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9764309764309764, |
|
"grad_norm": 2.725060224533081, |
|
"learning_rate": 3.37682379349046e-05, |
|
"loss": 2.3802, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9848484848484849, |
|
"grad_norm": 2.43306303024292, |
|
"learning_rate": 3.362794612794613e-05, |
|
"loss": 2.4778, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9932659932659933, |
|
"grad_norm": 2.6997814178466797, |
|
"learning_rate": 3.348765432098766e-05, |
|
"loss": 2.4728, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.0016835016835017, |
|
"grad_norm": 2.8203701972961426, |
|
"learning_rate": 3.334736251402918e-05, |
|
"loss": 2.4017, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.0101010101010102, |
|
"grad_norm": 2.8343305587768555, |
|
"learning_rate": 3.320707070707071e-05, |
|
"loss": 2.3664, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0185185185185186, |
|
"grad_norm": 3.814268112182617, |
|
"learning_rate": 3.306677890011223e-05, |
|
"loss": 2.5748, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.026936026936027, |
|
"grad_norm": 2.555053234100342, |
|
"learning_rate": 3.2926487093153765e-05, |
|
"loss": 2.4044, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.0353535353535352, |
|
"grad_norm": 3.142622470855713, |
|
"learning_rate": 3.278619528619529e-05, |
|
"loss": 2.3966, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.0437710437710437, |
|
"grad_norm": 2.609618663787842, |
|
"learning_rate": 3.2645903479236815e-05, |
|
"loss": 2.556, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0521885521885521, |
|
"grad_norm": 3.1092679500579834, |
|
"learning_rate": 3.250561167227834e-05, |
|
"loss": 2.2779, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0606060606060606, |
|
"grad_norm": 3.010402202606201, |
|
"learning_rate": 3.2365319865319865e-05, |
|
"loss": 2.3634, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.069023569023569, |
|
"grad_norm": 2.553971290588379, |
|
"learning_rate": 3.22250280583614e-05, |
|
"loss": 2.3588, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0774410774410774, |
|
"grad_norm": 2.712143898010254, |
|
"learning_rate": 3.208473625140292e-05, |
|
"loss": 2.3728, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.0858585858585859, |
|
"grad_norm": 2.4108312129974365, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 2.2615, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.0942760942760943, |
|
"grad_norm": 2.677093267440796, |
|
"learning_rate": 3.180415263748597e-05, |
|
"loss": 2.267, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1026936026936027, |
|
"grad_norm": 2.9490318298339844, |
|
"learning_rate": 3.16638608305275e-05, |
|
"loss": 2.6672, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 2.6774485111236572, |
|
"learning_rate": 3.152356902356903e-05, |
|
"loss": 2.4183, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.1195286195286196, |
|
"grad_norm": 3.1193931102752686, |
|
"learning_rate": 3.138327721661055e-05, |
|
"loss": 2.4276, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.127946127946128, |
|
"grad_norm": 2.6562869548797607, |
|
"learning_rate": 3.1242985409652074e-05, |
|
"loss": 2.4551, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 2.88474440574646, |
|
"learning_rate": 3.1102693602693605e-05, |
|
"loss": 2.3054, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.144781144781145, |
|
"grad_norm": 2.933032989501953, |
|
"learning_rate": 3.096240179573513e-05, |
|
"loss": 2.4734, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.1531986531986531, |
|
"grad_norm": 2.922410488128662, |
|
"learning_rate": 3.082210998877666e-05, |
|
"loss": 2.499, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.1616161616161615, |
|
"grad_norm": 2.990966320037842, |
|
"learning_rate": 3.068181818181818e-05, |
|
"loss": 2.4067, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.17003367003367, |
|
"grad_norm": 2.89037823677063, |
|
"learning_rate": 3.0541526374859706e-05, |
|
"loss": 2.3944, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.1784511784511784, |
|
"grad_norm": 2.6384222507476807, |
|
"learning_rate": 3.0401234567901238e-05, |
|
"loss": 2.2478, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1868686868686869, |
|
"grad_norm": 3.4815266132354736, |
|
"learning_rate": 3.0260942760942763e-05, |
|
"loss": 2.3356, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.1952861952861953, |
|
"grad_norm": 3.02565336227417, |
|
"learning_rate": 3.0120650953984285e-05, |
|
"loss": 2.4739, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.2037037037037037, |
|
"grad_norm": 2.780487060546875, |
|
"learning_rate": 2.9980359147025817e-05, |
|
"loss": 2.3009, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 3.0833890438079834, |
|
"learning_rate": 2.984006734006734e-05, |
|
"loss": 2.4, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.2205387205387206, |
|
"grad_norm": 2.707517623901367, |
|
"learning_rate": 2.969977553310887e-05, |
|
"loss": 2.4067, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.228956228956229, |
|
"grad_norm": 2.5139541625976562, |
|
"learning_rate": 2.9559483726150396e-05, |
|
"loss": 2.3598, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.2373737373737375, |
|
"grad_norm": 3.169325590133667, |
|
"learning_rate": 2.9419191919191918e-05, |
|
"loss": 2.4147, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.2457912457912457, |
|
"grad_norm": 2.7292799949645996, |
|
"learning_rate": 2.927890011223345e-05, |
|
"loss": 2.4204, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.2542087542087543, |
|
"grad_norm": 2.791738271713257, |
|
"learning_rate": 2.913860830527497e-05, |
|
"loss": 2.4267, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.2626262626262625, |
|
"grad_norm": 2.608445882797241, |
|
"learning_rate": 2.8998316498316503e-05, |
|
"loss": 2.3114, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.271043771043771, |
|
"grad_norm": 2.5103442668914795, |
|
"learning_rate": 2.8858024691358025e-05, |
|
"loss": 2.2693, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.2794612794612794, |
|
"grad_norm": 2.824044942855835, |
|
"learning_rate": 2.871773288439955e-05, |
|
"loss": 2.394, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.2878787878787878, |
|
"grad_norm": 3.275538444519043, |
|
"learning_rate": 2.8577441077441082e-05, |
|
"loss": 2.5109, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.2962962962962963, |
|
"grad_norm": 3.2262158393859863, |
|
"learning_rate": 2.8437149270482604e-05, |
|
"loss": 2.3438, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.3047138047138047, |
|
"grad_norm": 2.8264598846435547, |
|
"learning_rate": 2.829685746352413e-05, |
|
"loss": 2.4934, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3131313131313131, |
|
"grad_norm": 2.9306719303131104, |
|
"learning_rate": 2.8156565656565658e-05, |
|
"loss": 2.2246, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.3215488215488216, |
|
"grad_norm": 2.6973257064819336, |
|
"learning_rate": 2.8016273849607183e-05, |
|
"loss": 2.3807, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.32996632996633, |
|
"grad_norm": 3.0139241218566895, |
|
"learning_rate": 2.787598204264871e-05, |
|
"loss": 2.5024, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.3383838383838385, |
|
"grad_norm": 2.5364763736724854, |
|
"learning_rate": 2.7735690235690237e-05, |
|
"loss": 2.2749, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.3468013468013469, |
|
"grad_norm": 2.8624165058135986, |
|
"learning_rate": 2.7595398428731762e-05, |
|
"loss": 2.3387, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.355218855218855, |
|
"grad_norm": 2.9810757637023926, |
|
"learning_rate": 2.745510662177329e-05, |
|
"loss": 2.1873, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 2.5730276107788086, |
|
"learning_rate": 2.7314814814814816e-05, |
|
"loss": 2.273, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.372053872053872, |
|
"grad_norm": 3.2745449542999268, |
|
"learning_rate": 2.7174523007856344e-05, |
|
"loss": 2.2767, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.3804713804713804, |
|
"grad_norm": 2.857235908508301, |
|
"learning_rate": 2.703423120089787e-05, |
|
"loss": 2.4409, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 2.7736656665802, |
|
"learning_rate": 2.6893939393939394e-05, |
|
"loss": 2.3483, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.3973063973063973, |
|
"grad_norm": 3.5529961585998535, |
|
"learning_rate": 2.6753647586980923e-05, |
|
"loss": 2.4341, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.4057239057239057, |
|
"grad_norm": 2.9840221405029297, |
|
"learning_rate": 2.6613355780022448e-05, |
|
"loss": 2.3764, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.4141414141414141, |
|
"grad_norm": 3.015062093734741, |
|
"learning_rate": 2.6473063973063973e-05, |
|
"loss": 2.3547, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.4225589225589226, |
|
"grad_norm": 3.440065860748291, |
|
"learning_rate": 2.6332772166105502e-05, |
|
"loss": 2.3314, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.430976430976431, |
|
"grad_norm": 3.125835657119751, |
|
"learning_rate": 2.6192480359147027e-05, |
|
"loss": 2.3632, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.4393939393939394, |
|
"grad_norm": 2.9020016193389893, |
|
"learning_rate": 2.6052188552188556e-05, |
|
"loss": 2.4821, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.4478114478114479, |
|
"grad_norm": 3.136042594909668, |
|
"learning_rate": 2.591189674523008e-05, |
|
"loss": 2.4106, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.4562289562289563, |
|
"grad_norm": 2.7179207801818848, |
|
"learning_rate": 2.5771604938271603e-05, |
|
"loss": 2.3794, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.4646464646464645, |
|
"grad_norm": 2.6184420585632324, |
|
"learning_rate": 2.5631313131313135e-05, |
|
"loss": 2.3573, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.4730639730639732, |
|
"grad_norm": 2.7196948528289795, |
|
"learning_rate": 2.549102132435466e-05, |
|
"loss": 2.4751, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 3.360743999481201, |
|
"learning_rate": 2.5350729517396188e-05, |
|
"loss": 2.4172, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.4898989898989898, |
|
"grad_norm": 3.908057689666748, |
|
"learning_rate": 2.5210437710437713e-05, |
|
"loss": 2.3338, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.4983164983164983, |
|
"grad_norm": 3.2845561504364014, |
|
"learning_rate": 2.5070145903479235e-05, |
|
"loss": 2.4202, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.5067340067340067, |
|
"grad_norm": 2.720625638961792, |
|
"learning_rate": 2.4929854096520764e-05, |
|
"loss": 2.5369, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 2.977970600128174, |
|
"learning_rate": 2.478956228956229e-05, |
|
"loss": 2.2776, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.5235690235690236, |
|
"grad_norm": 2.835860252380371, |
|
"learning_rate": 2.4649270482603818e-05, |
|
"loss": 2.329, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.531986531986532, |
|
"grad_norm": 3.039226531982422, |
|
"learning_rate": 2.4508978675645343e-05, |
|
"loss": 2.4787, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.5404040404040404, |
|
"grad_norm": 2.7011232376098633, |
|
"learning_rate": 2.4368686868686868e-05, |
|
"loss": 2.4924, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.5488215488215489, |
|
"grad_norm": 2.741283893585205, |
|
"learning_rate": 2.4228395061728396e-05, |
|
"loss": 2.4265, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.557239057239057, |
|
"grad_norm": 2.7896602153778076, |
|
"learning_rate": 2.408810325476992e-05, |
|
"loss": 2.2275, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5656565656565657, |
|
"grad_norm": 3.505488872528076, |
|
"learning_rate": 2.394781144781145e-05, |
|
"loss": 2.4043, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.574074074074074, |
|
"grad_norm": 2.921954393386841, |
|
"learning_rate": 2.3807519640852975e-05, |
|
"loss": 2.3513, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.5824915824915826, |
|
"grad_norm": 3.427619695663452, |
|
"learning_rate": 2.36672278338945e-05, |
|
"loss": 2.3622, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 3.277251720428467, |
|
"learning_rate": 2.352693602693603e-05, |
|
"loss": 2.3942, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.5993265993265995, |
|
"grad_norm": 3.206590175628662, |
|
"learning_rate": 2.3386644219977554e-05, |
|
"loss": 2.4197, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6077441077441077, |
|
"grad_norm": 3.070993423461914, |
|
"learning_rate": 2.3246352413019083e-05, |
|
"loss": 2.3346, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.6161616161616161, |
|
"grad_norm": 2.9139037132263184, |
|
"learning_rate": 2.3106060606060605e-05, |
|
"loss": 2.2375, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.6245791245791246, |
|
"grad_norm": 3.173426628112793, |
|
"learning_rate": 2.2965768799102133e-05, |
|
"loss": 2.3717, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.632996632996633, |
|
"grad_norm": 2.910637855529785, |
|
"learning_rate": 2.282547699214366e-05, |
|
"loss": 2.28, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.6414141414141414, |
|
"grad_norm": 3.881056785583496, |
|
"learning_rate": 2.2685185185185187e-05, |
|
"loss": 2.3383, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.6498316498316499, |
|
"grad_norm": 3.2495148181915283, |
|
"learning_rate": 2.2544893378226712e-05, |
|
"loss": 2.3704, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.6582491582491583, |
|
"grad_norm": 2.6956305503845215, |
|
"learning_rate": 2.2404601571268237e-05, |
|
"loss": 2.3639, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 3.1080222129821777, |
|
"learning_rate": 2.2264309764309766e-05, |
|
"loss": 2.4179, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.6750841750841752, |
|
"grad_norm": 2.9564356803894043, |
|
"learning_rate": 2.212401795735129e-05, |
|
"loss": 2.3974, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.6835016835016834, |
|
"grad_norm": 2.73732590675354, |
|
"learning_rate": 2.198372615039282e-05, |
|
"loss": 2.2624, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.691919191919192, |
|
"grad_norm": 2.9970414638519287, |
|
"learning_rate": 2.1843434343434345e-05, |
|
"loss": 2.3246, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.7003367003367003, |
|
"grad_norm": 3.410376787185669, |
|
"learning_rate": 2.170314253647587e-05, |
|
"loss": 2.3357, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.708754208754209, |
|
"grad_norm": 2.965650796890259, |
|
"learning_rate": 2.15628507295174e-05, |
|
"loss": 2.3428, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.7171717171717171, |
|
"grad_norm": 3.167226791381836, |
|
"learning_rate": 2.1422558922558924e-05, |
|
"loss": 2.1527, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.7255892255892256, |
|
"grad_norm": 4.487528324127197, |
|
"learning_rate": 2.128226711560045e-05, |
|
"loss": 2.4263, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.734006734006734, |
|
"grad_norm": 3.309645414352417, |
|
"learning_rate": 2.1141975308641977e-05, |
|
"loss": 2.313, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.7424242424242424, |
|
"grad_norm": 3.7654778957366943, |
|
"learning_rate": 2.1001683501683502e-05, |
|
"loss": 2.3591, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.7508417508417509, |
|
"grad_norm": 3.188965320587158, |
|
"learning_rate": 2.086139169472503e-05, |
|
"loss": 2.4313, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.7592592592592593, |
|
"grad_norm": 3.119352102279663, |
|
"learning_rate": 2.0721099887766553e-05, |
|
"loss": 2.3364, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.7676767676767677, |
|
"grad_norm": 3.435917854309082, |
|
"learning_rate": 2.058080808080808e-05, |
|
"loss": 2.4577, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.776094276094276, |
|
"grad_norm": 3.3111941814422607, |
|
"learning_rate": 2.0440516273849607e-05, |
|
"loss": 2.3218, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.7845117845117846, |
|
"grad_norm": 4.261884689331055, |
|
"learning_rate": 2.0300224466891135e-05, |
|
"loss": 2.4014, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.7929292929292928, |
|
"grad_norm": 2.9653213024139404, |
|
"learning_rate": 2.0159932659932664e-05, |
|
"loss": 2.309, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.8013468013468015, |
|
"grad_norm": 2.784932851791382, |
|
"learning_rate": 2.0019640852974185e-05, |
|
"loss": 2.3611, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.8097643097643097, |
|
"grad_norm": 3.5843029022216797, |
|
"learning_rate": 1.9879349046015714e-05, |
|
"loss": 2.4837, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 3.4695231914520264, |
|
"learning_rate": 1.973905723905724e-05, |
|
"loss": 2.284, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.8265993265993266, |
|
"grad_norm": 3.2327258586883545, |
|
"learning_rate": 1.9598765432098768e-05, |
|
"loss": 2.3084, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.835016835016835, |
|
"grad_norm": 2.878326177597046, |
|
"learning_rate": 1.9458473625140293e-05, |
|
"loss": 2.5618, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.8434343434343434, |
|
"grad_norm": 3.4700164794921875, |
|
"learning_rate": 1.9318181818181818e-05, |
|
"loss": 2.3738, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 3.615539789199829, |
|
"learning_rate": 1.9177890011223347e-05, |
|
"loss": 2.2962, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8602693602693603, |
|
"grad_norm": 3.0230872631073, |
|
"learning_rate": 1.9037598204264872e-05, |
|
"loss": 2.3229, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.8686868686868687, |
|
"grad_norm": 3.3907663822174072, |
|
"learning_rate": 1.8897306397306397e-05, |
|
"loss": 2.4199, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.8771043771043772, |
|
"grad_norm": 3.1739697456359863, |
|
"learning_rate": 1.8757014590347925e-05, |
|
"loss": 2.3878, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.8855218855218854, |
|
"grad_norm": 3.4084649085998535, |
|
"learning_rate": 1.861672278338945e-05, |
|
"loss": 2.3386, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.893939393939394, |
|
"grad_norm": 2.9609739780426025, |
|
"learning_rate": 1.847643097643098e-05, |
|
"loss": 2.3211, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.9023569023569022, |
|
"grad_norm": 3.075378894805908, |
|
"learning_rate": 1.8336139169472504e-05, |
|
"loss": 2.3675, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.910774410774411, |
|
"grad_norm": 2.9113922119140625, |
|
"learning_rate": 1.819584736251403e-05, |
|
"loss": 2.3151, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.9191919191919191, |
|
"grad_norm": 3.364899158477783, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 2.1976, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.9276094276094278, |
|
"grad_norm": 4.041044235229492, |
|
"learning_rate": 1.7915263748597083e-05, |
|
"loss": 2.366, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.936026936026936, |
|
"grad_norm": 2.874314785003662, |
|
"learning_rate": 1.777497194163861e-05, |
|
"loss": 2.2946, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 3.636214256286621, |
|
"learning_rate": 1.7634680134680134e-05, |
|
"loss": 2.2318, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.9528619528619529, |
|
"grad_norm": 3.3151936531066895, |
|
"learning_rate": 1.7494388327721662e-05, |
|
"loss": 2.484, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.9612794612794613, |
|
"grad_norm": 2.95894193649292, |
|
"learning_rate": 1.7354096520763187e-05, |
|
"loss": 2.2886, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.9696969696969697, |
|
"grad_norm": 3.3470945358276367, |
|
"learning_rate": 1.7213804713804716e-05, |
|
"loss": 2.331, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.9781144781144782, |
|
"grad_norm": 2.9090137481689453, |
|
"learning_rate": 1.707351290684624e-05, |
|
"loss": 2.3689, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.9865319865319866, |
|
"grad_norm": 2.894779682159424, |
|
"learning_rate": 1.6933221099887766e-05, |
|
"loss": 2.4426, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.9949494949494948, |
|
"grad_norm": 3.315685749053955, |
|
"learning_rate": 1.6792929292929295e-05, |
|
"loss": 2.3809, |
|
"step": 2370 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3564, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5099434873454592.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|