|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 0, |
|
"global_step": 1479, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002028397565922921, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 9.993238674780258e-06, |
|
"loss": 2.0083, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004056795131845842, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 9.986477349560515e-06, |
|
"loss": 1.8898, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006085192697768763, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 9.979716024340772e-06, |
|
"loss": 1.9586, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008113590263691683, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 9.972954699121028e-06, |
|
"loss": 1.8423, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010141987829614604, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 9.966193373901285e-06, |
|
"loss": 1.9374, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012170385395537525, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 9.959432048681542e-06, |
|
"loss": 1.856, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014198782961460446, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 9.9526707234618e-06, |
|
"loss": 1.827, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016227180527383367, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 9.945909398242056e-06, |
|
"loss": 1.9399, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018255578093306288, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 9.939148073022313e-06, |
|
"loss": 1.8973, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02028397565922921, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 9.93238674780257e-06, |
|
"loss": 1.7955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02231237322515213, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 9.925625422582827e-06, |
|
"loss": 1.7947, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02434077079107505, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 9.918864097363084e-06, |
|
"loss": 1.7894, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02636916835699797, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 9.91210277214334e-06, |
|
"loss": 1.7834, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028397565922920892, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 9.905341446923598e-06, |
|
"loss": 1.6634, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.030425963488843813, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 9.898580121703854e-06, |
|
"loss": 1.7655, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032454361054766734, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 9.891818796484111e-06, |
|
"loss": 1.6973, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.034482758620689655, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 9.885057471264368e-06, |
|
"loss": 1.7866, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.036511156186612576, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 9.878296146044625e-06, |
|
"loss": 1.6943, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.038539553752535496, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 9.871534820824882e-06, |
|
"loss": 1.7, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04056795131845842, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 9.864773495605139e-06, |
|
"loss": 1.6642, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04259634888438134, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 9.858012170385396e-06, |
|
"loss": 1.6852, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04462474645030426, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 9.851250845165653e-06, |
|
"loss": 1.6855, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04665314401622718, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 9.84448951994591e-06, |
|
"loss": 1.6641, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0486815415821501, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 9.837728194726167e-06, |
|
"loss": 1.6916, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05070993914807302, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 9.830966869506424e-06, |
|
"loss": 1.7211, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05273833671399594, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 9.82420554428668e-06, |
|
"loss": 1.5421, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05476673427991886, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 9.817444219066939e-06, |
|
"loss": 1.5551, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.056795131845841784, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 9.810682893847194e-06, |
|
"loss": 1.5284, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 1.5221, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.060851926977687626, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 9.797160243407708e-06, |
|
"loss": 1.6265, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06288032454361055, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 9.790398918187965e-06, |
|
"loss": 1.582, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06490872210953347, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 9.783637592968222e-06, |
|
"loss": 1.5373, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06693711967545639, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 9.776876267748479e-06, |
|
"loss": 1.4842, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 9.770114942528738e-06, |
|
"loss": 1.4742, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07099391480730223, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 9.763353617308994e-06, |
|
"loss": 1.5238, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07302231237322515, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 9.75659229208925e-06, |
|
"loss": 1.4861, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07505070993914807, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 9.749830966869507e-06, |
|
"loss": 1.475, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07707910750507099, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 9.743069641649763e-06, |
|
"loss": 1.492, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07910750507099391, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 9.73630831643002e-06, |
|
"loss": 1.4861, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08113590263691683, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 9.729546991210277e-06, |
|
"loss": 1.4854, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08316430020283976, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 9.722785665990536e-06, |
|
"loss": 1.4827, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08519269776876268, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 9.716024340770793e-06, |
|
"loss": 1.5104, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0872210953346856, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 9.70926301555105e-06, |
|
"loss": 1.4619, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08924949290060852, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 9.702501690331305e-06, |
|
"loss": 1.4507, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09127789046653144, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 9.695740365111562e-06, |
|
"loss": 1.4821, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09330628803245436, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 9.688979039891819e-06, |
|
"loss": 1.4255, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09533468559837728, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 9.682217714672076e-06, |
|
"loss": 1.3824, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0973630831643002, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 9.675456389452334e-06, |
|
"loss": 1.3772, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09939148073022312, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 9.668695064232591e-06, |
|
"loss": 1.4889, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10141987829614604, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 9.661933739012848e-06, |
|
"loss": 1.4423, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10344827586206896, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 9.655172413793105e-06, |
|
"loss": 1.415, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10547667342799188, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 9.64841108857336e-06, |
|
"loss": 1.3956, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1075050709939148, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 9.641649763353617e-06, |
|
"loss": 1.469, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.10953346855983773, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 9.634888438133874e-06, |
|
"loss": 1.4105, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11156186612576065, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 9.628127112914133e-06, |
|
"loss": 1.4032, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11359026369168357, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 9.62136578769439e-06, |
|
"loss": 1.2954, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11561866125760649, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 9.614604462474646e-06, |
|
"loss": 1.3745, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 9.607843137254903e-06, |
|
"loss": 1.3565, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11967545638945233, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 9.60108181203516e-06, |
|
"loss": 1.3677, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12170385395537525, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 9.594320486815416e-06, |
|
"loss": 1.3665, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12373225152129817, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 9.587559161595672e-06, |
|
"loss": 1.3636, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1257606490872211, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 9.580797836375931e-06, |
|
"loss": 1.3645, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12778904665314403, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 9.574036511156188e-06, |
|
"loss": 1.3666, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12981744421906694, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 9.567275185936445e-06, |
|
"loss": 1.3332, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13184584178498987, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 9.560513860716702e-06, |
|
"loss": 1.373, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13387423935091278, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 9.553752535496959e-06, |
|
"loss": 1.3931, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1359026369168357, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 9.546991210277216e-06, |
|
"loss": 1.2847, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 9.54022988505747e-06, |
|
"loss": 1.3494, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13995943204868155, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 9.53346855983773e-06, |
|
"loss": 1.3461, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14198782961460446, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 9.526707234617986e-06, |
|
"loss": 1.3208, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1440162271805274, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 9.519945909398243e-06, |
|
"loss": 1.3241, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1460446247464503, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 9.5131845841785e-06, |
|
"loss": 1.3235, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14807302231237324, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 9.506423258958757e-06, |
|
"loss": 1.3221, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15010141987829614, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 9.499661933739014e-06, |
|
"loss": 1.3067, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15212981744421908, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.492900608519271e-06, |
|
"loss": 1.3077, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15415821501014199, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 9.486139283299526e-06, |
|
"loss": 1.3585, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15618661257606492, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 9.479377958079785e-06, |
|
"loss": 1.3229, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15821501014198783, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 9.472616632860042e-06, |
|
"loss": 1.3157, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16024340770791076, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 9.465855307640299e-06, |
|
"loss": 1.2941, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16227180527383367, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 9.459093982420555e-06, |
|
"loss": 1.3267, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1643002028397566, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 9.452332657200812e-06, |
|
"loss": 1.3108, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1663286004056795, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 9.44557133198107e-06, |
|
"loss": 1.2806, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16835699797160245, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 9.438810006761326e-06, |
|
"loss": 1.2928, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17038539553752535, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 9.432048681541583e-06, |
|
"loss": 1.3039, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 9.42528735632184e-06, |
|
"loss": 1.3077, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1744421906693712, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 9.418526031102097e-06, |
|
"loss": 1.3454, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 1.3095, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17849898580121704, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 9.40500338066261e-06, |
|
"loss": 1.3264, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18052738336713997, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 9.398242055442868e-06, |
|
"loss": 1.3138, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18255578093306288, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 9.391480730223125e-06, |
|
"loss": 1.2476, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1845841784989858, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.384719405003381e-06, |
|
"loss": 1.2699, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18661257606490872, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 9.377958079783638e-06, |
|
"loss": 1.3391, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18864097363083165, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 9.371196754563895e-06, |
|
"loss": 1.3236, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19066937119675456, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 9.364435429344152e-06, |
|
"loss": 1.3209, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1926977687626775, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 9.357674104124409e-06, |
|
"loss": 1.3001, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1947261663286004, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 9.350912778904666e-06, |
|
"loss": 1.2758, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19675456389452334, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 9.344151453684923e-06, |
|
"loss": 1.2668, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.19878296146044624, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 9.33739012846518e-06, |
|
"loss": 1.2948, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.20081135902636918, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 9.330628803245437e-06, |
|
"loss": 1.3023, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2028397565922921, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 9.323867478025694e-06, |
|
"loss": 1.2459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20486815415821502, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.31710615280595e-06, |
|
"loss": 1.2824, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 9.310344827586207e-06, |
|
"loss": 1.2607, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.20892494929006086, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 9.303583502366464e-06, |
|
"loss": 1.2807, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.21095334685598377, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 9.296822177146721e-06, |
|
"loss": 1.2656, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2129817444219067, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 9.290060851926978e-06, |
|
"loss": 1.2689, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2150101419878296, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 9.283299526707235e-06, |
|
"loss": 1.2439, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21703853955375255, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 9.276538201487492e-06, |
|
"loss": 1.2258, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.21906693711967545, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 9.269776876267749e-06, |
|
"loss": 1.2496, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2210953346855984, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 9.263015551048006e-06, |
|
"loss": 1.2327, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2231237322515213, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 9.256254225828263e-06, |
|
"loss": 1.2223, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22515212981744423, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 9.24949290060852e-06, |
|
"loss": 1.2318, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22718052738336714, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 9.242731575388777e-06, |
|
"loss": 1.2651, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.22920892494929007, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 9.235970250169034e-06, |
|
"loss": 1.1937, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.23123732251521298, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 9.22920892494929e-06, |
|
"loss": 1.239, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2332657200811359, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 9.222447599729547e-06, |
|
"loss": 1.2483, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 9.215686274509804e-06, |
|
"loss": 1.2162, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.23732251521298176, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 9.208924949290061e-06, |
|
"loss": 1.2844, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.23935091277890466, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 9.202163624070318e-06, |
|
"loss": 1.2785, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2413793103448276, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 9.195402298850575e-06, |
|
"loss": 1.284, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2434077079107505, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 9.188640973630832e-06, |
|
"loss": 1.2332, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24543610547667344, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 9.181879648411089e-06, |
|
"loss": 1.2216, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24746450304259635, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 9.175118323191346e-06, |
|
"loss": 1.2629, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.24949290060851928, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 9.168356997971604e-06, |
|
"loss": 1.2498, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2515212981744422, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 9.16159567275186e-06, |
|
"loss": 1.249, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2535496957403651, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 9.154834347532116e-06, |
|
"loss": 1.2158, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25557809330628806, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 9.148073022312373e-06, |
|
"loss": 1.227, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.25760649087221094, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 9.14131169709263e-06, |
|
"loss": 1.2769, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.25963488843813387, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 9.134550371872887e-06, |
|
"loss": 1.2222, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2616632860040568, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 9.127789046653144e-06, |
|
"loss": 1.2199, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.26369168356997974, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 9.121027721433403e-06, |
|
"loss": 1.2321, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2657200811359026, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 9.11426639621366e-06, |
|
"loss": 1.2217, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26774847870182555, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 9.107505070993915e-06, |
|
"loss": 1.2269, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2697768762677485, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 9.100743745774172e-06, |
|
"loss": 1.2449, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2718052738336714, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 9.093982420554429e-06, |
|
"loss": 1.2616, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2738336713995943, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 9.087221095334686e-06, |
|
"loss": 1.2719, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 9.080459770114942e-06, |
|
"loss": 1.217, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2778904665314402, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 9.073698444895201e-06, |
|
"loss": 1.21, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2799188640973631, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 9.066937119675458e-06, |
|
"loss": 1.1984, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.281947261663286, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 9.060175794455715e-06, |
|
"loss": 1.2262, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2839756592292089, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 9.05341446923597e-06, |
|
"loss": 1.2322, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28600405679513186, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 9.046653144016227e-06, |
|
"loss": 1.2377, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2880324543610548, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 9.039891818796484e-06, |
|
"loss": 1.2473, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.29006085192697767, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 9.033130493576741e-06, |
|
"loss": 1.2727, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.2920892494929006, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 9.026369168357e-06, |
|
"loss": 1.1902, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 9.019607843137256e-06, |
|
"loss": 1.2436, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2961460446247465, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 9.012846517917513e-06, |
|
"loss": 1.1909, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.29817444219066935, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 9.00608519269777e-06, |
|
"loss": 1.2215, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3002028397565923, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 8.999323867478025e-06, |
|
"loss": 1.2217, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3022312373225152, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 8.992562542258282e-06, |
|
"loss": 1.2267, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.30425963488843816, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 8.98580121703854e-06, |
|
"loss": 1.2343, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.30628803245436104, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 8.979039891818798e-06, |
|
"loss": 1.2193, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.30831643002028397, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 8.972278566599055e-06, |
|
"loss": 1.2008, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3103448275862069, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 8.965517241379312e-06, |
|
"loss": 1.217, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.31237322515212984, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 8.958755916159569e-06, |
|
"loss": 1.2081, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3144016227180527, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 8.951994590939825e-06, |
|
"loss": 1.1466, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.31643002028397565, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 8.94523326572008e-06, |
|
"loss": 1.1587, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3184584178498986, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 8.938471940500338e-06, |
|
"loss": 1.1839, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3204868154158215, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 8.931710615280596e-06, |
|
"loss": 1.208, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3225152129817444, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 8.924949290060853e-06, |
|
"loss": 1.1899, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.32454361054766734, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 8.91818796484111e-06, |
|
"loss": 1.2019, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3265720081135903, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 8.911426639621367e-06, |
|
"loss": 1.1336, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3286004056795132, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 8.904665314401624e-06, |
|
"loss": 1.1841, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3306288032454361, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.89790398918188e-06, |
|
"loss": 1.2169, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.332657200811359, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 8.891142663962136e-06, |
|
"loss": 1.1857, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33468559837728196, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.884381338742395e-06, |
|
"loss": 1.1672, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3367139959432049, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 8.877620013522652e-06, |
|
"loss": 1.2451, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.33874239350912777, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.870858688302908e-06, |
|
"loss": 1.2504, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3407707910750507, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 8.864097363083165e-06, |
|
"loss": 1.194, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.34279918864097364, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 8.857336037863422e-06, |
|
"loss": 1.1883, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.85057471264368e-06, |
|
"loss": 1.205, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34685598377281945, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 8.843813387423936e-06, |
|
"loss": 1.2313, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3488843813387424, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 8.837052062204193e-06, |
|
"loss": 1.163, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3509127789046653, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 8.83029073698445e-06, |
|
"loss": 1.1458, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 1.1783, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.35496957403651114, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 8.816768086544964e-06, |
|
"loss": 1.1763, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35699797160243407, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 8.81000676132522e-06, |
|
"loss": 1.1719, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.359026369168357, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.803245436105478e-06, |
|
"loss": 1.1703, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.36105476673427994, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.796484110885734e-06, |
|
"loss": 1.213, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3630831643002028, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 8.789722785665991e-06, |
|
"loss": 1.1656, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.36511156186612576, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 8.782961460446248e-06, |
|
"loss": 1.1381, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3671399594320487, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 8.776200135226505e-06, |
|
"loss": 1.213, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3691683569979716, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 8.769438810006762e-06, |
|
"loss": 1.1966, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3711967545638945, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 8.762677484787019e-06, |
|
"loss": 1.2488, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.37322515212981744, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 8.755916159567276e-06, |
|
"loss": 1.1837, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3752535496957404, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.749154834347533e-06, |
|
"loss": 1.1915, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3772819472616633, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 8.74239350912779e-06, |
|
"loss": 1.187, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3793103448275862, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 8.735632183908047e-06, |
|
"loss": 1.189, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3813387423935091, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 8.728870858688304e-06, |
|
"loss": 1.1898, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.38336713995943206, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.72210953346856e-06, |
|
"loss": 1.1981, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.385395537525355, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.715348208248817e-06, |
|
"loss": 1.1717, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.38742393509127787, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 8.708586883029074e-06, |
|
"loss": 1.1787, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3894523326572008, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 8.701825557809331e-06, |
|
"loss": 1.1728, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.39148073022312374, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 8.695064232589588e-06, |
|
"loss": 1.1653, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3935091277890467, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 8.688302907369845e-06, |
|
"loss": 1.117, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.39553752535496955, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 8.681541582150102e-06, |
|
"loss": 1.1384, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3975659229208925, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 8.674780256930359e-06, |
|
"loss": 1.1527, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3995943204868154, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.668018931710616e-06, |
|
"loss": 1.1487, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.40162271805273836, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.661257606490873e-06, |
|
"loss": 1.2106, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.40365111561866124, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 8.65449628127113e-06, |
|
"loss": 1.1738, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4056795131845842, |
|
"grad_norm": 1.890625, |
|
"learning_rate": 8.647734956051387e-06, |
|
"loss": 1.1729, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4077079107505071, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 8.640973630831643e-06, |
|
"loss": 1.1877, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.40973630831643004, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 8.6342123056119e-06, |
|
"loss": 1.1108, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4117647058823529, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 8.627450980392157e-06, |
|
"loss": 1.1975, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 1.1852, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4158215010141988, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 8.613928329952671e-06, |
|
"loss": 1.1958, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4178498985801217, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 8.607167004732928e-06, |
|
"loss": 1.1645, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4198782961460446, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 8.600405679513185e-06, |
|
"loss": 1.126, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.42190669371196754, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 8.593644354293442e-06, |
|
"loss": 1.159, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4239350912778905, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 8.586883029073699e-06, |
|
"loss": 1.1864, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.4259634888438134, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 8.580121703853956e-06, |
|
"loss": 1.1173, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4279918864097363, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 8.573360378634214e-06, |
|
"loss": 1.1146, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4300202839756592, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 8.56659905341447e-06, |
|
"loss": 1.2435, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43204868154158216, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 8.559837728194726e-06, |
|
"loss": 1.1341, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4340770791075051, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 8.553076402974983e-06, |
|
"loss": 1.1846, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.43610547667342797, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 8.54631507775524e-06, |
|
"loss": 1.156, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4381338742393509, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.539553752535497e-06, |
|
"loss": 1.1899, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.44016227180527384, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 8.532792427315754e-06, |
|
"loss": 1.1491, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4421906693711968, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 8.526031102096013e-06, |
|
"loss": 1.116, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.44421906693711966, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 8.51926977687627e-06, |
|
"loss": 1.1333, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4462474645030426, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 8.512508451656525e-06, |
|
"loss": 1.1806, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4482758620689655, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 8.505747126436782e-06, |
|
"loss": 1.1594, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.45030425963488846, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.498985801217039e-06, |
|
"loss": 1.1281, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.45233265720081134, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 8.492224475997295e-06, |
|
"loss": 1.1715, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4543610547667343, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 8.485463150777552e-06, |
|
"loss": 1.179, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.4563894523326572, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 8.478701825557811e-06, |
|
"loss": 1.1349, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.45841784989858014, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 8.471940500338068e-06, |
|
"loss": 1.165, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.460446247464503, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 8.465179175118325e-06, |
|
"loss": 1.1169, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.46247464503042596, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 8.45841784989858e-06, |
|
"loss": 1.17, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4645030425963489, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 8.451656524678837e-06, |
|
"loss": 1.0952, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4665314401622718, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 8.444895199459094e-06, |
|
"loss": 1.1216, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4685598377281947, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.43813387423935e-06, |
|
"loss": 1.1362, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.43137254901961e-06, |
|
"loss": 1.1633, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.4726166328600406, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 8.424611223799866e-06, |
|
"loss": 1.1214, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.4746450304259635, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 8.417849898580123e-06, |
|
"loss": 1.1316, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4766734279918864, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 8.41108857336038e-06, |
|
"loss": 1.1439, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4787018255578093, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 8.404327248140635e-06, |
|
"loss": 1.1409, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.48073022312373226, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 8.397565922920892e-06, |
|
"loss": 1.1773, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 8.390804597701149e-06, |
|
"loss": 1.1738, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4847870182555781, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.384043272481408e-06, |
|
"loss": 1.1175, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.486815415821501, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 8.377281947261665e-06, |
|
"loss": 1.1377, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.48884381338742394, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 8.370520622041922e-06, |
|
"loss": 1.19, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4908722109533469, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 8.363759296822178e-06, |
|
"loss": 1.159, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.49290060851926976, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 8.356997971602435e-06, |
|
"loss": 1.1575, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4949290060851927, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 8.35023664638269e-06, |
|
"loss": 1.1038, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4969574036511156, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 8.343475321162948e-06, |
|
"loss": 1.1418, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.49898580121703856, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 8.336713995943206e-06, |
|
"loss": 1.1345, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5010141987829615, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 8.329952670723463e-06, |
|
"loss": 1.14, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5030425963488844, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 8.32319134550372e-06, |
|
"loss": 1.1234, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5050709939148073, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 8.316430020283977e-06, |
|
"loss": 1.1479, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5070993914807302, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 8.309668695064234e-06, |
|
"loss": 1.1019, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5091277890466531, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.30290736984449e-06, |
|
"loss": 1.1496, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5111561866125761, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 8.296146044624746e-06, |
|
"loss": 1.1473, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.513184584178499, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 8.289384719405005e-06, |
|
"loss": 1.153, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5152129817444219, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 8.282623394185261e-06, |
|
"loss": 1.1607, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 8.275862068965518e-06, |
|
"loss": 1.1626, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5192697768762677, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 8.269100743745775e-06, |
|
"loss": 1.1553, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5212981744421906, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 8.262339418526032e-06, |
|
"loss": 1.1893, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5233265720081136, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 8.255578093306289e-06, |
|
"loss": 1.0982, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5253549695740365, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 8.248816768086546e-06, |
|
"loss": 1.1448, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5273833671399595, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 8.242055442866801e-06, |
|
"loss": 1.1399, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 1.0997, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5314401622718052, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 8.228532792427317e-06, |
|
"loss": 1.128, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5334685598377282, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.221771467207574e-06, |
|
"loss": 1.1279, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5354969574036511, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 8.21501014198783e-06, |
|
"loss": 1.1596, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.537525354969574, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.208248816768087e-06, |
|
"loss": 1.1058, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.539553752535497, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.201487491548344e-06, |
|
"loss": 1.125, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5415821501014199, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 8.194726166328601e-06, |
|
"loss": 1.1207, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5436105476673428, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 8.187964841108858e-06, |
|
"loss": 1.1092, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5456389452332657, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 8.181203515889115e-06, |
|
"loss": 1.1074, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5476673427991886, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 8.174442190669372e-06, |
|
"loss": 1.0965, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5496957403651116, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 8.167680865449629e-06, |
|
"loss": 1.1247, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.160919540229886e-06, |
|
"loss": 1.1494, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5537525354969574, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 8.154158215010143e-06, |
|
"loss": 1.1079, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5557809330628803, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 8.1473968897904e-06, |
|
"loss": 1.1407, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5578093306288032, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 8.140635564570657e-06, |
|
"loss": 1.1192, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5598377281947262, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 8.133874239350913e-06, |
|
"loss": 1.1077, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5618661257606491, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 8.12711291413117e-06, |
|
"loss": 1.1626, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.563894523326572, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.120351588911427e-06, |
|
"loss": 1.0602, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.565922920892495, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 8.113590263691684e-06, |
|
"loss": 1.0946, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5679513184584178, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 8.106828938471941e-06, |
|
"loss": 1.1575, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5699797160243407, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 8.100067613252198e-06, |
|
"loss": 1.1317, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5720081135902637, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 8.093306288032455e-06, |
|
"loss": 1.1557, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5740365111561866, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 8.086544962812712e-06, |
|
"loss": 1.1137, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5760649087221096, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 8.079783637592969e-06, |
|
"loss": 1.1279, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5780933062880325, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 8.073022312373226e-06, |
|
"loss": 1.0888, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5801217038539553, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 8.066260987153483e-06, |
|
"loss": 1.1286, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5821501014198783, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 8.05949966193374e-06, |
|
"loss": 1.1542, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5841784989858012, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 8.052738336713996e-06, |
|
"loss": 1.1188, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5862068965517241, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 8.045977011494253e-06, |
|
"loss": 1.1265, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 8.03921568627451e-06, |
|
"loss": 1.1099, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.59026369168357, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 8.032454361054767e-06, |
|
"loss": 1.088, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.592292089249493, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.025693035835024e-06, |
|
"loss": 1.1426, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5943204868154158, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 8.018931710615281e-06, |
|
"loss": 1.099, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5963488843813387, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 8.012170385395538e-06, |
|
"loss": 1.1593, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5983772819472617, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 8.005409060175795e-06, |
|
"loss": 1.1478, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6004056795131846, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 7.998647734956052e-06, |
|
"loss": 1.1165, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6024340770791075, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 7.991886409736309e-06, |
|
"loss": 1.0772, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6044624746450304, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 7.985125084516566e-06, |
|
"loss": 1.0881, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6064908722109533, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 7.978363759296822e-06, |
|
"loss": 1.0898, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6085192697768763, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 7.97160243407708e-06, |
|
"loss": 1.095, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6105476673427992, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 7.964841108857336e-06, |
|
"loss": 1.1129, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6125760649087221, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.958079783637593e-06, |
|
"loss": 1.1106, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6146044624746451, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 7.95131845841785e-06, |
|
"loss": 1.0957, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6166328600405679, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 7.944557133198107e-06, |
|
"loss": 1.1653, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6186612576064908, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 7.937795807978364e-06, |
|
"loss": 1.13, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6206896551724138, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 7.93103448275862e-06, |
|
"loss": 1.1194, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6227180527383367, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 7.92427315753888e-06, |
|
"loss": 1.1271, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6247464503042597, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 7.917511832319135e-06, |
|
"loss": 1.1328, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6267748478701826, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 7.910750507099392e-06, |
|
"loss": 1.1269, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6288032454361054, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 7.903989181879648e-06, |
|
"loss": 1.094, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6308316430020284, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 7.897227856659905e-06, |
|
"loss": 1.0956, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6328600405679513, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 7.890466531440162e-06, |
|
"loss": 1.0989, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6348884381338742, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.88370520622042e-06, |
|
"loss": 1.1115, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6369168356997972, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 7.876943881000678e-06, |
|
"loss": 1.1377, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6389452332657201, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 7.870182555780935e-06, |
|
"loss": 1.0829, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.640973630831643, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 7.86342123056119e-06, |
|
"loss": 1.0775, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6430020283975659, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 7.856659905341447e-06, |
|
"loss": 1.058, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6450304259634888, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 7.849898580121704e-06, |
|
"loss": 1.0845, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6470588235294118, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 1.1295, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6490872210953347, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.836375929682218e-06, |
|
"loss": 1.1128, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6511156186612576, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 7.829614604462476e-06, |
|
"loss": 1.0901, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6531440162271805, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 7.822853279242733e-06, |
|
"loss": 1.0869, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6551724137931034, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 7.81609195402299e-06, |
|
"loss": 1.1382, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6572008113590264, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 7.809330628803245e-06, |
|
"loss": 1.0594, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6592292089249493, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 7.802569303583502e-06, |
|
"loss": 1.1196, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6612576064908722, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 7.795807978363759e-06, |
|
"loss": 1.0247, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6632860040567952, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 7.789046653144016e-06, |
|
"loss": 1.1146, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.665314401622718, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 7.782285327924275e-06, |
|
"loss": 1.0828, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6673427991886409, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 7.775524002704531e-06, |
|
"loss": 1.0726, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6693711967545639, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 7.768762677484788e-06, |
|
"loss": 1.1187, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6713995943204868, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 7.762001352265045e-06, |
|
"loss": 1.118, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6734279918864098, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 7.7552400270453e-06, |
|
"loss": 1.1304, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6754563894523327, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 7.748478701825557e-06, |
|
"loss": 1.0665, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6774847870182555, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 7.741717376605814e-06, |
|
"loss": 1.1892, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6795131845841785, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 7.734956051386073e-06, |
|
"loss": 1.1124, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6815415821501014, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.72819472616633e-06, |
|
"loss": 1.1264, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6835699797160243, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 7.721433400946587e-06, |
|
"loss": 1.1204, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6855983772819473, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 7.714672075726844e-06, |
|
"loss": 1.0857, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6876267748478702, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.7079107505071e-06, |
|
"loss": 1.0994, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.701149425287356e-06, |
|
"loss": 1.1221, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.691683569979716, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.694388100067613e-06, |
|
"loss": 1.1262, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6937119675456389, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 7.687626774847871e-06, |
|
"loss": 1.1352, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6957403651115619, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 7.680865449628128e-06, |
|
"loss": 1.097, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6977687626774848, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 7.674104124408385e-06, |
|
"loss": 1.1079, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.6997971602434077, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.667342799188642e-06, |
|
"loss": 1.0598, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7018255578093306, |
|
"grad_norm": 0.25, |
|
"learning_rate": 7.660581473968899e-06, |
|
"loss": 1.1034, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7038539553752535, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 7.653820148749156e-06, |
|
"loss": 1.0847, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 7.647058823529411e-06, |
|
"loss": 1.0882, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7079107505070994, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 7.64029749830967e-06, |
|
"loss": 1.1341, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7099391480730223, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 7.633536173089927e-06, |
|
"loss": 1.0863, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7119675456389453, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 7.626774847870183e-06, |
|
"loss": 1.0326, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7139959432048681, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 7.6200135226504404e-06, |
|
"loss": 1.0598, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.716024340770791, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 7.613252197430697e-06, |
|
"loss": 1.0759, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.718052738336714, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 7.606490872210954e-06, |
|
"loss": 1.116, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7200811359026369, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 7.599729546991211e-06, |
|
"loss": 1.1235, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7221095334685599, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 7.592968221771467e-06, |
|
"loss": 1.1006, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7241379310344828, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.586206896551724e-06, |
|
"loss": 1.1092, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7261663286004056, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 7.579445571331981e-06, |
|
"loss": 1.0582, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7281947261663286, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 7.572684246112239e-06, |
|
"loss": 1.1501, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7302231237322515, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 7.565922920892496e-06, |
|
"loss": 1.1079, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7322515212981744, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 7.559161595672753e-06, |
|
"loss": 1.0929, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7342799188640974, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 7.5524002704530095e-06, |
|
"loss": 1.0586, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7363083164300203, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 7.5456389452332665e-06, |
|
"loss": 1.1174, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7383367139959433, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 7.5388776200135225e-06, |
|
"loss": 1.1049, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7403651115618661, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 7.5321162947937794e-06, |
|
"loss": 1.1212, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.742393509127789, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 7.525354969574037e-06, |
|
"loss": 1.1117, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.744421906693712, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 7.518593644354294e-06, |
|
"loss": 1.0885, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7464503042596349, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 7.511832319134551e-06, |
|
"loss": 1.1178, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7484787018255578, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 7.505070993914808e-06, |
|
"loss": 1.0965, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7505070993914807, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 7.498309668695065e-06, |
|
"loss": 1.0961, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7525354969574036, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 7.491548343475323e-06, |
|
"loss": 1.0683, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7545638945233266, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 7.484787018255578e-06, |
|
"loss": 1.0914, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7565922920892495, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.4780256930358356e-06, |
|
"loss": 1.0738, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7586206896551724, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 7.4712643678160925e-06, |
|
"loss": 1.0488, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7606490872210954, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.464503042596349e-06, |
|
"loss": 1.1491, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7626774847870182, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 7.457741717376606e-06, |
|
"loss": 1.095, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7647058823529411, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 7.450980392156863e-06, |
|
"loss": 1.112, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7667342799188641, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.444219066937121e-06, |
|
"loss": 1.0972, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.768762677484787, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 7.437457741717378e-06, |
|
"loss": 1.1215, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.77079107505071, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.430696416497634e-06, |
|
"loss": 1.1147, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7728194726166329, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 7.423935091277891e-06, |
|
"loss": 1.0614, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7748478701825557, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 7.417173766058148e-06, |
|
"loss": 1.1024, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7768762677484787, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 7.410412440838405e-06, |
|
"loss": 1.0603, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7789046653144016, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 7.403651115618662e-06, |
|
"loss": 1.1042, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7809330628803245, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 7.396889790398919e-06, |
|
"loss": 1.1058, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7829614604462475, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 7.390128465179176e-06, |
|
"loss": 1.1207, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7849898580121704, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 7.383367139959433e-06, |
|
"loss": 1.0499, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7870182555780934, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 7.376605814739689e-06, |
|
"loss": 1.1111, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7890466531440162, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.369844489519946e-06, |
|
"loss": 1.1469, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7910750507099391, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 7.363083164300203e-06, |
|
"loss": 1.0579, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7931034482758621, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 7.35632183908046e-06, |
|
"loss": 1.0904, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.795131845841785, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 7.349560513860718e-06, |
|
"loss": 1.1116, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.7971602434077079, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 7.342799188640975e-06, |
|
"loss": 1.0659, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7991886409736308, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 7.3360378634212316e-06, |
|
"loss": 1.0893, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8012170385395537, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 7.3292765382014885e-06, |
|
"loss": 1.0927, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8032454361054767, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 7.3225152129817445e-06, |
|
"loss": 1.0579, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8052738336713996, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 7.3157538877620015e-06, |
|
"loss": 1.0357, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8073022312373225, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 7.308992562542258e-06, |
|
"loss": 1.0872, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8093306288032455, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 7.302231237322516e-06, |
|
"loss": 1.0981, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8113590263691683, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 7.295469912102773e-06, |
|
"loss": 1.1117, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8133874239350912, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 7.28870858688303e-06, |
|
"loss": 1.0784, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8154158215010142, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 7.281947261663287e-06, |
|
"loss": 1.1133, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8174442190669371, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 7.275185936443544e-06, |
|
"loss": 1.0285, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8194726166328601, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 7.2684246112238e-06, |
|
"loss": 1.0703, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.821501014198783, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 7.261663286004057e-06, |
|
"loss": 1.091, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8235294117647058, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.2549019607843145e-06, |
|
"loss": 1.0706, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8255578093306288, |
|
"grad_norm": 0.25, |
|
"learning_rate": 7.248140635564571e-06, |
|
"loss": 1.0761, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 7.241379310344828e-06, |
|
"loss": 1.1163, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8296146044624746, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 7.234617985125085e-06, |
|
"loss": 1.1036, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8316430020283976, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 7.227856659905342e-06, |
|
"loss": 1.096, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8336713995943205, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.221095334685599e-06, |
|
"loss": 1.0663, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8356997971602435, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 7.214334009465855e-06, |
|
"loss": 1.119, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8377281947261663, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 7.207572684246112e-06, |
|
"loss": 1.1057, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8397565922920892, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 7.20081135902637e-06, |
|
"loss": 1.0777, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8417849898580122, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 7.194050033806627e-06, |
|
"loss": 1.1091, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8438133874239351, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 7.187288708586884e-06, |
|
"loss": 1.055, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.845841784989858, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 7.1805273833671405e-06, |
|
"loss": 1.1213, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.847870182555781, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 7.1737660581473974e-06, |
|
"loss": 1.0516, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8498985801217038, |
|
"grad_norm": 0.2236328125, |
|
"learning_rate": 7.167004732927655e-06, |
|
"loss": 1.1068, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8519269776876268, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 7.16024340770791e-06, |
|
"loss": 1.1144, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8539553752535497, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.153482082488168e-06, |
|
"loss": 1.0661, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8559837728194726, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 7.146720757268425e-06, |
|
"loss": 1.1194, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8580121703853956, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 7.139959432048682e-06, |
|
"loss": 1.0732, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8600405679513184, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.133198106828939e-06, |
|
"loss": 1.0919, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 7.126436781609196e-06, |
|
"loss": 1.0936, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8640973630831643, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 7.119675456389454e-06, |
|
"loss": 1.1049, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8661257606490872, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 7.1129141311697105e-06, |
|
"loss": 1.1039, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8681541582150102, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 7.1061528059499666e-06, |
|
"loss": 1.034, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8701825557809331, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 7.0993914807302235e-06, |
|
"loss": 1.1177, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8722109533468559, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 7.09263015551048e-06, |
|
"loss": 1.0703, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8742393509127789, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 7.085868830290737e-06, |
|
"loss": 1.0331, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8762677484787018, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.079107505070994e-06, |
|
"loss": 1.0849, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8782961460446247, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 7.072346179851252e-06, |
|
"loss": 1.0612, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8803245436105477, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 7.065584854631509e-06, |
|
"loss": 1.0692, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 1.0678, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8843813387423936, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.052062204192022e-06, |
|
"loss": 1.0662, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8864097363083164, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 7.045300878972279e-06, |
|
"loss": 1.08, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8884381338742393, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 7.038539553752536e-06, |
|
"loss": 1.1165, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8904665314401623, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 7.031778228532793e-06, |
|
"loss": 1.0933, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8924949290060852, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 7.02501690331305e-06, |
|
"loss": 1.1124, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8945233265720081, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 7.018255578093307e-06, |
|
"loss": 1.1189, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.896551724137931, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 7.011494252873564e-06, |
|
"loss": 1.0761, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.8985801217038539, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 7.004732927653821e-06, |
|
"loss": 1.0929, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9006085192697769, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.997971602434077e-06, |
|
"loss": 1.0499, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9026369168356998, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 6.991210277214334e-06, |
|
"loss": 1.0665, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9046653144016227, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 6.984448951994591e-06, |
|
"loss": 1.0624, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9066937119675457, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 6.977687626774849e-06, |
|
"loss": 1.1199, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9087221095334685, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 6.970926301555106e-06, |
|
"loss": 1.0736, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9107505070993914, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 6.9641649763353625e-06, |
|
"loss": 1.1005, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9127789046653144, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 6.9574036511156195e-06, |
|
"loss": 1.1016, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9148073022312373, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 6.950642325895876e-06, |
|
"loss": 1.0351, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9168356997971603, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 6.9438810006761324e-06, |
|
"loss": 1.055, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9188640973630832, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 6.937119675456389e-06, |
|
"loss": 1.0669, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.920892494929006, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 6.930358350236647e-06, |
|
"loss": 1.1177, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.922920892494929, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 6.923597025016904e-06, |
|
"loss": 1.1167, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9249492900608519, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 6.916835699797161e-06, |
|
"loss": 1.0982, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9269776876267748, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 6.910074374577418e-06, |
|
"loss": 1.0828, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9290060851926978, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 6.903313049357675e-06, |
|
"loss": 1.0629, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9310344827586207, |
|
"grad_norm": 0.625, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 1.107, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9330628803245437, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 6.889790398918188e-06, |
|
"loss": 1.0501, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9350912778904665, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.8830290736984455e-06, |
|
"loss": 1.0976, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9371196754563894, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 6.876267748478702e-06, |
|
"loss": 1.1372, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9391480730223124, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 6.869506423258959e-06, |
|
"loss": 1.0403, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 1.0414, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9432048681541582, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 6.855983772819473e-06, |
|
"loss": 1.104, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9452332657200812, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.84922244759973e-06, |
|
"loss": 1.1283, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.947261663286004, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.842461122379988e-06, |
|
"loss": 1.0765, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.949290060851927, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 6.835699797160244e-06, |
|
"loss": 1.0378, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9513184584178499, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.828938471940501e-06, |
|
"loss": 1.1038, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9533468559837728, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.822177146720758e-06, |
|
"loss": 1.0895, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9553752535496958, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.815415821501015e-06, |
|
"loss": 1.0341, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9574036511156186, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 6.8086544962812715e-06, |
|
"loss": 1.0862, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9594320486815415, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 6.801893171061528e-06, |
|
"loss": 1.1023, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9614604462474645, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 6.795131845841786e-06, |
|
"loss": 1.0123, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9634888438133874, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 6.788370520622043e-06, |
|
"loss": 1.1256, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 6.781609195402299e-06, |
|
"loss": 1.0688, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.9675456389452333, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 6.774847870182556e-06, |
|
"loss": 1.1208, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9695740365111561, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 6.768086544962813e-06, |
|
"loss": 1.087, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9716024340770791, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.76132521974307e-06, |
|
"loss": 1.099, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.973630831643002, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.754563894523327e-06, |
|
"loss": 1.1076, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9756592292089249, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.7478025693035846e-06, |
|
"loss": 1.0634, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9776876267748479, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 6.7410412440838415e-06, |
|
"loss": 1.0835, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.9797160243407708, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 6.734279918864098e-06, |
|
"loss": 1.0837, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9817444219066938, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 6.7275185936443544e-06, |
|
"loss": 1.0663, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9837728194726166, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.720757268424611e-06, |
|
"loss": 1.0664, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9858012170385395, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.713995943204868e-06, |
|
"loss": 1.0677, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9878296146044625, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 6.707234617985125e-06, |
|
"loss": 1.0646, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9898580121703854, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.700473292765383e-06, |
|
"loss": 1.0106, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9918864097363083, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 6.69371196754564e-06, |
|
"loss": 1.1186, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.9939148073022313, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 6.686950642325897e-06, |
|
"loss": 1.0706, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9959432048681541, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.680189317106154e-06, |
|
"loss": 1.065, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9979716024340771, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 6.67342799188641e-06, |
|
"loss": 1.0931, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.0794, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.002028397565923, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 6.6599053414469236e-06, |
|
"loss": 1.087, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.0040567951318458, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 6.653144016227181e-06, |
|
"loss": 1.0326, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.0060851926977687, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 6.646382691007438e-06, |
|
"loss": 1.056, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.0081135902636917, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.639621365787695e-06, |
|
"loss": 1.0488, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.0101419878296145, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 6.632860040567952e-06, |
|
"loss": 1.0683, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.0121703853955375, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 6.626098715348209e-06, |
|
"loss": 1.0659, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0141987829614605, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.619337390128465e-06, |
|
"loss": 1.0606, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0162271805273835, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 6.612576064908722e-06, |
|
"loss": 1.0074, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.0182555780933062, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 6.60581473968898e-06, |
|
"loss": 1.0893, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.0202839756592292, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 6.599053414469237e-06, |
|
"loss": 1.079, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.0223123732251522, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 6.5922920892494935e-06, |
|
"loss": 1.0616, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.024340770791075, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.5855307640297504e-06, |
|
"loss": 1.0783, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.026369168356998, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 6.578769438810007e-06, |
|
"loss": 1.0645, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.028397565922921, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.572008113590265e-06, |
|
"loss": 1.0546, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.0304259634888437, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.56524678837052e-06, |
|
"loss": 1.088, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.0324543610547667, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 6.558485463150778e-06, |
|
"loss": 1.1028, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 6.551724137931035e-06, |
|
"loss": 1.0477, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0365111561866125, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 6.544962812711292e-06, |
|
"loss": 1.0453, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.0385395537525355, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 6.538201487491549e-06, |
|
"loss": 1.0321, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0405679513184585, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 6.531440162271806e-06, |
|
"loss": 1.0569, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0425963488843812, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.5246788370520635e-06, |
|
"loss": 1.0619, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0446247464503042, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 6.51791751183232e-06, |
|
"loss": 1.0559, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.0466531440162272, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 6.5111561866125765e-06, |
|
"loss": 1.0531, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.04868154158215, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 6.504394861392833e-06, |
|
"loss": 1.0724, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.050709939148073, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.49763353617309e-06, |
|
"loss": 1.0478, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.052738336713996, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 6.490872210953347e-06, |
|
"loss": 1.0659, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.054766734279919, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 6.484110885733604e-06, |
|
"loss": 1.0672, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.0567951318458417, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 6.477349560513861e-06, |
|
"loss": 1.0845, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 1.0633, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.0608519269776877, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 6.463826910074376e-06, |
|
"loss": 1.1189, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.0628803245436105, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 6.457065584854632e-06, |
|
"loss": 1.0884, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.0649087221095335, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.450304259634889e-06, |
|
"loss": 1.0337, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0669371196754565, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.4435429344151456e-06, |
|
"loss": 1.0333, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.0689655172413792, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.4367816091954025e-06, |
|
"loss": 1.0785, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0709939148073022, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.430020283975659e-06, |
|
"loss": 1.0, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0730223123732252, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.423258958755917e-06, |
|
"loss": 1.0664, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.075050709939148, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 6.416497633536174e-06, |
|
"loss": 1.1181, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.077079107505071, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 6.409736308316431e-06, |
|
"loss": 1.0471, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.079107505070994, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 6.402974983096687e-06, |
|
"loss": 1.0518, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.081135902636917, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 6.396213657876944e-06, |
|
"loss": 1.0784, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.0831643002028397, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 6.389452332657201e-06, |
|
"loss": 1.0695, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.0851926977687627, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.382691007437458e-06, |
|
"loss": 1.044, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.0872210953346857, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 6.3759296822177155e-06, |
|
"loss": 1.049, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.0892494929006085, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 6.3691683569979724e-06, |
|
"loss": 1.0108, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.0912778904665315, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.362407031778229e-06, |
|
"loss": 1.02, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.0933062880324544, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 6.355645706558486e-06, |
|
"loss": 1.0442, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.0953346855983772, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 6.348884381338742e-06, |
|
"loss": 1.0711, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0973630831643002, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 6.342123056118999e-06, |
|
"loss": 1.0747, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.0993914807302232, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.335361730899256e-06, |
|
"loss": 1.0477, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.101419878296146, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 6.328600405679514e-06, |
|
"loss": 1.023, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.103448275862069, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.321839080459771e-06, |
|
"loss": 1.0482, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.105476673427992, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 6.315077755240028e-06, |
|
"loss": 1.1157, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.1075050709939147, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.308316430020285e-06, |
|
"loss": 1.0663, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.1095334685598377, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 6.3015551048005416e-06, |
|
"loss": 1.0461, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.1115618661257607, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 6.294793779580798e-06, |
|
"loss": 1.0951, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.1135902636916835, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 6.2880324543610545e-06, |
|
"loss": 1.0817, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.1156186612576064, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 6.281271129141312e-06, |
|
"loss": 1.0373, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1176470588235294, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 1.0159, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.1196754563894524, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 6.267748478701826e-06, |
|
"loss": 1.0672, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.1217038539553752, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 6.260987153482083e-06, |
|
"loss": 1.0657, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.1237322515212982, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 6.25422582826234e-06, |
|
"loss": 1.0105, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.1257606490872212, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 6.247464503042598e-06, |
|
"loss": 1.0405, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.127789046653144, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.240703177822853e-06, |
|
"loss": 1.0467, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.129817444219067, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.233941852603111e-06, |
|
"loss": 1.0506, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.13184584178499, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.227180527383368e-06, |
|
"loss": 1.0681, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.1338742393509127, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 6.2204192021636245e-06, |
|
"loss": 1.126, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.1359026369168357, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.213657876943881e-06, |
|
"loss": 1.0295, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1379310344827587, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 1.076, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.1399594320486814, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 6.200135226504396e-06, |
|
"loss": 1.0124, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.1419878296146044, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.193373901284653e-06, |
|
"loss": 1.1096, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1440162271805274, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 6.186612576064909e-06, |
|
"loss": 1.0413, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.1460446247464504, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 6.179851250845166e-06, |
|
"loss": 1.0461, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.1480730223123732, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 6.173089925625423e-06, |
|
"loss": 1.0517, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.1501014198782962, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.16632860040568e-06, |
|
"loss": 1.0945, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.1521298174442192, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 6.159567275185937e-06, |
|
"loss": 1.1018, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.154158215010142, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 6.1528059499661945e-06, |
|
"loss": 0.9737, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.156186612576065, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 6.146044624746451e-06, |
|
"loss": 1.0363, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.158215010141988, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 6.139283299526708e-06, |
|
"loss": 1.071, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.1602434077079107, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.132521974306964e-06, |
|
"loss": 1.0491, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.1622718052738337, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.125760649087221e-06, |
|
"loss": 1.0269, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.1643002028397567, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.118999323867478e-06, |
|
"loss": 1.0467, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.1663286004056794, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 6.112237998647735e-06, |
|
"loss": 1.0575, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.1683569979716024, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.105476673427993e-06, |
|
"loss": 1.0519, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.1703853955375254, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 6.09871534820825e-06, |
|
"loss": 1.0848, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.1724137931034484, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.091954022988507e-06, |
|
"loss": 1.0639, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.1744421906693712, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 6.0851926977687636e-06, |
|
"loss": 1.0619, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.07843137254902e-06, |
|
"loss": 1.0422, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.178498985801217, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 6.0716700473292766e-06, |
|
"loss": 1.0245, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.18052738336714, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 6.0649087221095335e-06, |
|
"loss": 1.051, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.182555780933063, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 6.058147396889791e-06, |
|
"loss": 1.0893, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.184584178498986, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 6.051386071670048e-06, |
|
"loss": 1.0841, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.1866125760649087, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.044624746450305e-06, |
|
"loss": 1.0444, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.1886409736308317, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 6.037863421230562e-06, |
|
"loss": 1.0222, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.1906693711967546, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 6.031102096010819e-06, |
|
"loss": 1.0554, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.1926977687626774, |
|
"grad_norm": 0.25, |
|
"learning_rate": 6.024340770791075e-06, |
|
"loss": 1.0436, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.1947261663286004, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 6.017579445571332e-06, |
|
"loss": 1.0239, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.1967545638945234, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 6.01081812035159e-06, |
|
"loss": 1.0412, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.1987829614604462, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 6.0040567951318465e-06, |
|
"loss": 1.0504, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.2008113590263692, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 5.9972954699121034e-06, |
|
"loss": 1.0352, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.2028397565922921, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 5.99053414469236e-06, |
|
"loss": 1.0506, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.204868154158215, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 5.983772819472617e-06, |
|
"loss": 1.0723, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.206896551724138, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.977011494252874e-06, |
|
"loss": 1.0563, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.208924949290061, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 5.97025016903313e-06, |
|
"loss": 1.0346, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.2109533468559839, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 5.963488843813387e-06, |
|
"loss": 1.0636, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.2129817444219066, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 5.956727518593645e-06, |
|
"loss": 1.0694, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.2150101419878296, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.949966193373902e-06, |
|
"loss": 1.0332, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.2170385395537526, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 5.943204868154159e-06, |
|
"loss": 1.0194, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2190669371196754, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 5.936443542934416e-06, |
|
"loss": 1.0426, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.2210953346855984, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 5.9296822177146725e-06, |
|
"loss": 1.0372, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.2231237322515214, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 5.92292089249493e-06, |
|
"loss": 0.9994, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.2251521298174441, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 5.9161595672751855e-06, |
|
"loss": 1.0621, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.2271805273833671, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 5.909398242055443e-06, |
|
"loss": 1.0593, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.2292089249492901, |
|
"grad_norm": 0.25, |
|
"learning_rate": 5.9026369168357e-06, |
|
"loss": 1.0912, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.231237322515213, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 5.895875591615957e-06, |
|
"loss": 1.0958, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.2332657200811359, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 5.889114266396214e-06, |
|
"loss": 0.997, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.2352941176470589, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.1018, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.2373225152129819, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 5.875591615956729e-06, |
|
"loss": 1.0352, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2393509127789046, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 5.868830290736986e-06, |
|
"loss": 1.0638, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.2413793103448276, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.862068965517242e-06, |
|
"loss": 1.0008, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.2434077079107504, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.8553076402974986e-06, |
|
"loss": 1.0329, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.2454361054766734, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 5.8485463150777555e-06, |
|
"loss": 1.0629, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.2474645030425964, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 5.841784989858012e-06, |
|
"loss": 1.0493, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.2494929006085194, |
|
"grad_norm": 0.25, |
|
"learning_rate": 5.835023664638269e-06, |
|
"loss": 1.0285, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.2515212981744421, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 5.828262339418527e-06, |
|
"loss": 1.0356, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.2535496957403651, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 5.821501014198784e-06, |
|
"loss": 1.0757, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.2555780933062881, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 5.814739688979041e-06, |
|
"loss": 1.0564, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.2576064908722109, |
|
"grad_norm": 0.25, |
|
"learning_rate": 5.807978363759297e-06, |
|
"loss": 1.0453, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2596348884381339, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.801217038539554e-06, |
|
"loss": 1.0489, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.2616632860040569, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.794455713319811e-06, |
|
"loss": 1.0171, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.2636916835699799, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 5.787694388100068e-06, |
|
"loss": 1.0695, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.2657200811359026, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 5.7809330628803254e-06, |
|
"loss": 1.0492, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.2677484787018256, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 5.774171737660582e-06, |
|
"loss": 1.0291, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.2697768762677484, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 5.767410412440839e-06, |
|
"loss": 1.0349, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.2718052738336714, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 5.760649087221096e-06, |
|
"loss": 1.0465, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.2738336713995944, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 5.753887762001352e-06, |
|
"loss": 1.0746, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.2758620689655173, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 1.058, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.2778904665314401, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.740365111561866e-06, |
|
"loss": 1.0603, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.279918864097363, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.733603786342124e-06, |
|
"loss": 1.0767, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.2819472616632859, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.726842461122381e-06, |
|
"loss": 1.0272, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.2839756592292089, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 5.720081135902638e-06, |
|
"loss": 1.0608, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.2860040567951319, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 5.7133198106828946e-06, |
|
"loss": 1.0151, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.2880324543610548, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 5.7065584854631515e-06, |
|
"loss": 1.0519, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.2900608519269776, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 5.6997971602434075e-06, |
|
"loss": 1.0587, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.2920892494929006, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 5.6930358350236644e-06, |
|
"loss": 1.0706, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.2941176470588236, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 5.686274509803922e-06, |
|
"loss": 1.0084, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.2961460446247464, |
|
"grad_norm": 0.25, |
|
"learning_rate": 5.679513184584179e-06, |
|
"loss": 1.0433, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.2981744421906694, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 5.672751859364436e-06, |
|
"loss": 1.0044, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3002028397565923, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.665990534144693e-06, |
|
"loss": 0.9463, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.3022312373225153, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 5.65922920892495e-06, |
|
"loss": 1.0042, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.304259634888438, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 5.652467883705207e-06, |
|
"loss": 1.0487, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.306288032454361, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 5.645706558485463e-06, |
|
"loss": 1.0599, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.3083164300202839, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 5.638945233265721e-06, |
|
"loss": 1.0294, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.3103448275862069, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.6321839080459775e-06, |
|
"loss": 1.0401, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.3123732251521298, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.625422582826234e-06, |
|
"loss": 1.0143, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.3144016227180528, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 5.618661257606491e-06, |
|
"loss": 1.0506, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.3164300202839756, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 5.611899932386748e-06, |
|
"loss": 1.0314, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.3184584178498986, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 5.605138607167005e-06, |
|
"loss": 1.0434, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3204868154158216, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 5.598377281947263e-06, |
|
"loss": 1.0598, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.3225152129817443, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 5.591615956727519e-06, |
|
"loss": 1.011, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.3245436105476673, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.584854631507776e-06, |
|
"loss": 1.0506, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.3265720081135903, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.578093306288033e-06, |
|
"loss": 1.0387, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.3286004056795133, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 5.57133198106829e-06, |
|
"loss": 1.0571, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.330628803245436, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 5.564570655848547e-06, |
|
"loss": 1.0351, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.332657200811359, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 5.5578093306288035e-06, |
|
"loss": 1.0454, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.3346855983772818, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.551048005409061e-06, |
|
"loss": 1.0374, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.3367139959432048, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 5.544286680189318e-06, |
|
"loss": 1.0395, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.3387423935091278, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 5.537525354969574e-06, |
|
"loss": 0.989, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.3407707910750508, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 5.530764029749831e-06, |
|
"loss": 1.0449, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.3427991886409736, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 5.524002704530088e-06, |
|
"loss": 0.9985, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.3448275862068966, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 5.517241379310345e-06, |
|
"loss": 1.0594, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.3468559837728193, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 5.510480054090602e-06, |
|
"loss": 1.0711, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.3488843813387423, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.50371872887086e-06, |
|
"loss": 1.0372, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.3509127789046653, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.4969574036511166e-06, |
|
"loss": 1.0782, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.3529411764705883, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 5.4901960784313735e-06, |
|
"loss": 1.0556, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.354969574036511, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 5.4834347532116295e-06, |
|
"loss": 1.0984, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.356997971602434, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 5.4766734279918865e-06, |
|
"loss": 0.9965, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.359026369168357, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 5.469912102772143e-06, |
|
"loss": 1.0513, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.3610547667342798, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.4631507775524e-06, |
|
"loss": 1.0406, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.3630831643002028, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 5.456389452332658e-06, |
|
"loss": 1.0864, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.3651115618661258, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 5.449628127112915e-06, |
|
"loss": 1.0379, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.3671399594320488, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 5.442866801893172e-06, |
|
"loss": 1.0279, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.3691683569979716, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 5.436105476673429e-06, |
|
"loss": 1.0537, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3711967545638946, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.429344151453685e-06, |
|
"loss": 1.022, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.3732251521298173, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 5.422582826233942e-06, |
|
"loss": 1.0154, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3752535496957403, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.415821501014199e-06, |
|
"loss": 1.0449, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.3772819472616633, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.409060175794456e-06, |
|
"loss": 1.0143, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 5.402298850574713e-06, |
|
"loss": 1.047, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.381338742393509, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 5.39553752535497e-06, |
|
"loss": 1.036, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.383367139959432, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 5.388776200135227e-06, |
|
"loss": 1.076, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.385395537525355, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 5.382014874915484e-06, |
|
"loss": 1.0543, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.3874239350912778, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 5.37525354969574e-06, |
|
"loss": 1.0474, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.3894523326572008, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 5.368492224475997e-06, |
|
"loss": 1.0287, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.3914807302231238, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 5.361730899256255e-06, |
|
"loss": 1.0412, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.3935091277890468, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 5.354969574036512e-06, |
|
"loss": 1.1062, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.3955375253549696, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 5.348208248816769e-06, |
|
"loss": 1.0338, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.3975659229208925, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.3414469235970255e-06, |
|
"loss": 1.0185, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.3995943204868153, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.3346855983772824e-06, |
|
"loss": 1.0041, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4016227180527383, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 5.32792427315754e-06, |
|
"loss": 1.0337, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.4036511156186613, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 5.321162947937795e-06, |
|
"loss": 1.0103, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.4056795131845843, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 5.314401622718053e-06, |
|
"loss": 1.0466, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.407707910750507, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 5.30764029749831e-06, |
|
"loss": 1.0092, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.40973630831643, |
|
"grad_norm": 0.25, |
|
"learning_rate": 5.300878972278567e-06, |
|
"loss": 1.0347, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 1.0402, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.4137931034482758, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.287356321839081e-06, |
|
"loss": 1.0706, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.4158215010141988, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.280594996619339e-06, |
|
"loss": 1.0361, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.4178498985801218, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.2738336713995955e-06, |
|
"loss": 1.0189, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.4198782961460445, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 5.2670723461798516e-06, |
|
"loss": 1.0324, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4219066937119675, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 5.2603110209601085e-06, |
|
"loss": 1.0408, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.4239350912778905, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 5.253549695740365e-06, |
|
"loss": 1.0557, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.4259634888438133, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 5.246788370520622e-06, |
|
"loss": 1.0519, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.4279918864097363, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 5.240027045300879e-06, |
|
"loss": 0.9978, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.4300202839756593, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 5.233265720081136e-06, |
|
"loss": 0.9964, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.4320486815415823, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.226504394861394e-06, |
|
"loss": 1.0495, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.434077079107505, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.219743069641651e-06, |
|
"loss": 1.0334, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.436105476673428, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 5.212981744421907e-06, |
|
"loss": 1.0414, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.4381338742393508, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 5.206220419202164e-06, |
|
"loss": 1.0865, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.4401622718052738, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.199459093982421e-06, |
|
"loss": 1.0293, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.4421906693711968, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.192697768762678e-06, |
|
"loss": 1.0371, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.4442190669371198, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 5.1859364435429345e-06, |
|
"loss": 1.0549, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.4462474645030425, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 5.179175118323192e-06, |
|
"loss": 1.0262, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.4482758620689655, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 5.172413793103449e-06, |
|
"loss": 1.0362, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.4503042596348885, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.165652467883706e-06, |
|
"loss": 1.0823, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.4523326572008113, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 5.158891142663962e-06, |
|
"loss": 1.0068, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.4543610547667343, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 5.152129817444219e-06, |
|
"loss": 1.0176, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.4563894523326573, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 5.145368492224476e-06, |
|
"loss": 1.0531, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.4584178498985803, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 5.138607167004733e-06, |
|
"loss": 1.003, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.460446247464503, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 5.131845841784991e-06, |
|
"loss": 1.097, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.462474645030426, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.1250845165652475e-06, |
|
"loss": 1.0326, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.4645030425963488, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.1183231913455045e-06, |
|
"loss": 1.0651, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.4665314401622718, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 5.111561866125761e-06, |
|
"loss": 1.0116, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.4685598377281948, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 5.1048005409060174e-06, |
|
"loss": 1.0215, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 5.098039215686274e-06, |
|
"loss": 1.0207, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.4726166328600405, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 5.091277890466531e-06, |
|
"loss": 1.0264, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.4746450304259635, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.084516565246789e-06, |
|
"loss": 1.0648, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.4766734279918863, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 5.077755240027046e-06, |
|
"loss": 1.0485, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.4787018255578093, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 5.070993914807303e-06, |
|
"loss": 1.0618, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.4807302231237323, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 5.06423258958756e-06, |
|
"loss": 1.0572, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.4827586206896552, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.057471264367817e-06, |
|
"loss": 1.0321, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.484787018255578, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.050709939148073e-06, |
|
"loss": 1.0498, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.486815415821501, |
|
"grad_norm": 0.5, |
|
"learning_rate": 5.04394861392833e-06, |
|
"loss": 1.0393, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.488843813387424, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.037187288708587e-06, |
|
"loss": 1.047, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.4908722109533468, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 5.030425963488844e-06, |
|
"loss": 1.128, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.4929006085192698, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.023664638269101e-06, |
|
"loss": 1.0581, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.4949290060851927, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 5.016903313049358e-06, |
|
"loss": 0.9735, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.4969574036511157, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 5.010141987829615e-06, |
|
"loss": 1.0535, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.4989858012170385, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 5.003380662609873e-06, |
|
"loss": 1.0524, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.5010141987829615, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 4.996619337390129e-06, |
|
"loss": 1.0575, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5030425963488843, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.989858012170386e-06, |
|
"loss": 1.008, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.5050709939148073, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 4.983096686950643e-06, |
|
"loss": 1.0194, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.5070993914807302, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.9763353617309e-06, |
|
"loss": 1.0579, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.5091277890466532, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 4.9695740365111565e-06, |
|
"loss": 1.0465, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.5111561866125762, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.962812711291413e-06, |
|
"loss": 1.044, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.513184584178499, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 4.95605138607167e-06, |
|
"loss": 1.048, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.5152129817444218, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 4.949290060851927e-06, |
|
"loss": 1.0386, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.5172413793103448, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 4.942528735632184e-06, |
|
"loss": 1.0929, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.5192697768762677, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 4.935767410412441e-06, |
|
"loss": 1.0158, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.5212981744421907, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 4.929006085192698e-06, |
|
"loss": 1.0559, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5233265720081137, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 4.922244759972955e-06, |
|
"loss": 1.0256, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.5253549695740365, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 4.915483434753212e-06, |
|
"loss": 1.0062, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.5273833671399595, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 4.9087221095334696e-06, |
|
"loss": 1.0676, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.5294117647058822, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 1.0193, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.5314401622718052, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 4.8951994590939825e-06, |
|
"loss": 1.0317, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.5334685598377282, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 4.8884381338742394e-06, |
|
"loss": 1.0093, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.5354969574036512, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 4.881676808654497e-06, |
|
"loss": 1.0976, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.537525354969574, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 4.874915483434753e-06, |
|
"loss": 1.0135, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.539553752535497, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 4.86815415821501e-06, |
|
"loss": 1.02, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.5415821501014197, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 4.861392832995268e-06, |
|
"loss": 1.0506, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.5436105476673427, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 4.854631507775525e-06, |
|
"loss": 1.0188, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.5456389452332657, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.847870182555781e-06, |
|
"loss": 1.0333, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.5476673427991887, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 4.841108857336038e-06, |
|
"loss": 1.0798, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.5496957403651117, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 4.834347532116296e-06, |
|
"loss": 1.01, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 4.8275862068965525e-06, |
|
"loss": 1.081, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.5537525354969572, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 4.8208248816768086e-06, |
|
"loss": 1.0455, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.5557809330628802, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 4.814063556457066e-06, |
|
"loss": 1.0435, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.5578093306288032, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 4.807302231237323e-06, |
|
"loss": 1.0402, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.5598377281947262, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 4.80054090601758e-06, |
|
"loss": 1.0597, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.5618661257606492, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 4.793779580797836e-06, |
|
"loss": 1.0579, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.563894523326572, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 4.787018255578094e-06, |
|
"loss": 1.0255, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.565922920892495, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 4.780256930358351e-06, |
|
"loss": 1.0427, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.5679513184584177, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 4.773495605138608e-06, |
|
"loss": 1.0013, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.5699797160243407, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 4.766734279918865e-06, |
|
"loss": 1.0138, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.5720081135902637, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 4.759972954699122e-06, |
|
"loss": 1.0689, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.5740365111561867, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 4.7532116294793785e-06, |
|
"loss": 1.0476, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.5760649087221097, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.7464503042596354e-06, |
|
"loss": 1.0279, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.5780933062880325, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.739688979039892e-06, |
|
"loss": 1.0612, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.5801217038539552, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.732927653820149e-06, |
|
"loss": 1.0301, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.5821501014198782, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 4.726166328600406e-06, |
|
"loss": 1.0758, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.5841784989858012, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 4.719405003380663e-06, |
|
"loss": 1.0069, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.5862068965517242, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 4.71264367816092e-06, |
|
"loss": 1.0242, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.5882352941176472, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 1.109, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.59026369168357, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 4.699121027721434e-06, |
|
"loss": 0.9922, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.592292089249493, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 4.692359702501691e-06, |
|
"loss": 1.0403, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.5943204868154157, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 4.685598377281948e-06, |
|
"loss": 0.9909, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.5963488843813387, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 4.6788370520622046e-06, |
|
"loss": 1.0451, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.5983772819472617, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.6720757268424615e-06, |
|
"loss": 1.0213, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.6004056795131847, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 4.665314401622718e-06, |
|
"loss": 0.9739, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.6024340770791075, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 4.658553076402975e-06, |
|
"loss": 1.0517, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.6044624746450304, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.651791751183232e-06, |
|
"loss": 1.0491, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.6064908722109532, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 4.645030425963489e-06, |
|
"loss": 1.0499, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.6085192697768762, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.638269100743746e-06, |
|
"loss": 1.0303, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.6105476673427992, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 4.631507775524003e-06, |
|
"loss": 1.0469, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.6125760649087222, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 4.62474645030426e-06, |
|
"loss": 1.0545, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.6146044624746452, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.617985125084517e-06, |
|
"loss": 1.0506, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.616632860040568, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 4.611223799864774e-06, |
|
"loss": 1.0621, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.6186612576064907, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 4.604462474645031e-06, |
|
"loss": 1.0528, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.6206896551724137, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.5977011494252875e-06, |
|
"loss": 1.0781, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.6227180527383367, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 4.590939824205544e-06, |
|
"loss": 1.053, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6247464503042597, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.584178498985802e-06, |
|
"loss": 1.0392, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.6267748478701827, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.577417173766058e-06, |
|
"loss": 1.0657, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.6288032454361054, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.570655848546315e-06, |
|
"loss": 1.0228, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.6308316430020284, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.563894523326572e-06, |
|
"loss": 1.0226, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.6328600405679512, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 4.55713319810683e-06, |
|
"loss": 1.0523, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.6348884381338742, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.550371872887086e-06, |
|
"loss": 1.011, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.6369168356997972, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.543610547667343e-06, |
|
"loss": 1.0638, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.6389452332657202, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 4.5368492224476005e-06, |
|
"loss": 1.0666, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.6409736308316432, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.5300878972278575e-06, |
|
"loss": 1.0697, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.643002028397566, |
|
"grad_norm": 0.25, |
|
"learning_rate": 4.5233265720081135e-06, |
|
"loss": 1.0536, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.6450304259634887, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 4.5165652467883704e-06, |
|
"loss": 1.0281, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.6470588235294117, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 4.509803921568628e-06, |
|
"loss": 1.0882, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.6490872210953347, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.503042596348885e-06, |
|
"loss": 1.0889, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.6511156186612577, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 4.496281271129141e-06, |
|
"loss": 1.0441, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.6531440162271807, |
|
"grad_norm": 0.25, |
|
"learning_rate": 4.489519945909399e-06, |
|
"loss": 1.075, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.6551724137931034, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.482758620689656e-06, |
|
"loss": 1.0429, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.6572008113590264, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 4.475997295469913e-06, |
|
"loss": 1.0555, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.6592292089249492, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 4.469235970250169e-06, |
|
"loss": 1.0478, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.6612576064908722, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.4624746450304266e-06, |
|
"loss": 1.0178, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6632860040567952, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 4.4557133198106835e-06, |
|
"loss": 1.009, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.6653144016227182, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.44895199459094e-06, |
|
"loss": 1.008, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.667342799188641, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.442190669371197e-06, |
|
"loss": 1.0553, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.669371196754564, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 4.435429344151454e-06, |
|
"loss": 1.0913, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.6713995943204867, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 4.428668018931711e-06, |
|
"loss": 1.0635, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.6734279918864097, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.421906693711968e-06, |
|
"loss": 1.0251, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.6754563894523327, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 4.415145368492225e-06, |
|
"loss": 1.0472, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.6774847870182557, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.408384043272482e-06, |
|
"loss": 1.0504, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.6795131845841786, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 4.401622718052739e-06, |
|
"loss": 1.0581, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.6815415821501014, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 4.394861392832996e-06, |
|
"loss": 1.0142, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.6835699797160242, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.388100067613253e-06, |
|
"loss": 1.0157, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.6855983772819472, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.3813387423935095e-06, |
|
"loss": 1.0668, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.6876267748478702, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 4.374577417173766e-06, |
|
"loss": 1.009, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.6896551724137931, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 4.367816091954023e-06, |
|
"loss": 1.0159, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.6916835699797161, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.36105476673428e-06, |
|
"loss": 1.0275, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.693711967545639, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 4.354293441514537e-06, |
|
"loss": 0.9794, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.695740365111562, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.347532116294794e-06, |
|
"loss": 1.0311, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.6977687626774847, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.340770791075051e-06, |
|
"loss": 1.0232, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.6997971602434077, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.334009465855308e-06, |
|
"loss": 1.0875, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.7018255578093306, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.327248140635565e-06, |
|
"loss": 1.0545, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.7038539553752536, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.320486815415822e-06, |
|
"loss": 1.1009, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.7058823529411766, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 1.0431, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.7079107505070994, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 4.3069641649763355e-06, |
|
"loss": 1.0407, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.7099391480730222, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.3002028397565924e-06, |
|
"loss": 1.0143, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.7119675456389452, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 4.293441514536849e-06, |
|
"loss": 1.0071, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.7139959432048681, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.286680189317107e-06, |
|
"loss": 1.0152, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.7160243407707911, |
|
"grad_norm": 0.25, |
|
"learning_rate": 4.279918864097363e-06, |
|
"loss": 1.0049, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.7180527383367141, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 4.27315753887762e-06, |
|
"loss": 1.048, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.720081135902637, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.266396213657877e-06, |
|
"loss": 1.0403, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.7221095334685599, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.259634888438135e-06, |
|
"loss": 1.0488, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.252873563218391e-06, |
|
"loss": 1.063, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7261663286004056, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.246112237998648e-06, |
|
"loss": 1.0591, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.7281947261663286, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 4.2393509127789055e-06, |
|
"loss": 1.0191, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.7302231237322516, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 4.232589587559162e-06, |
|
"loss": 1.0538, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.7322515212981744, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 4.2258282623394185e-06, |
|
"loss": 1.07, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.7342799188640974, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 4.219066937119675e-06, |
|
"loss": 1.0387, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.7363083164300201, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 4.212305611899933e-06, |
|
"loss": 1.0549, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.7383367139959431, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 4.20554428668019e-06, |
|
"loss": 1.0035, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.7403651115618661, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 4.198782961460446e-06, |
|
"loss": 1.0304, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.7423935091277891, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 4.192021636240704e-06, |
|
"loss": 1.0472, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.744421906693712, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 4.185260311020961e-06, |
|
"loss": 1.0034, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.7464503042596349, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 4.178498985801218e-06, |
|
"loss": 1.0649, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.7484787018255576, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.171737660581474e-06, |
|
"loss": 1.0257, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.7505070993914806, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.1649763353617315e-06, |
|
"loss": 1.0053, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.7525354969574036, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 4.1582150101419884e-06, |
|
"loss": 1.0393, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.7545638945233266, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.151453684922245e-06, |
|
"loss": 1.0857, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.7565922920892496, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 4.144692359702502e-06, |
|
"loss": 1.0793, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.7586206896551724, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 4.137931034482759e-06, |
|
"loss": 1.0013, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.7606490872210954, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.131169709263016e-06, |
|
"loss": 1.0325, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.7626774847870181, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 4.124408384043273e-06, |
|
"loss": 1.0407, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 1.0443, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7667342799188641, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 4.110885733603787e-06, |
|
"loss": 1.0305, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.768762677484787, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.104124408384044e-06, |
|
"loss": 1.0855, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.77079107505071, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 4.097363083164301e-06, |
|
"loss": 1.0202, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.7728194726166329, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.0906017579445575e-06, |
|
"loss": 1.0145, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.7748478701825556, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.0838404327248145e-06, |
|
"loss": 1.0533, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.7768762677484786, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 4.077079107505071e-06, |
|
"loss": 1.0158, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.7789046653144016, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 4.070317782285328e-06, |
|
"loss": 0.9991, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.7809330628803246, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.063556457065585e-06, |
|
"loss": 1.0217, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.7829614604462476, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 4.056795131845842e-06, |
|
"loss": 1.0227, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.7849898580121704, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 4.050033806626099e-06, |
|
"loss": 1.0078, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.7870182555780934, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 4.043272481406356e-06, |
|
"loss": 1.0662, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.7890466531440161, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 4.036511156186613e-06, |
|
"loss": 1.0436, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.791075050709939, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.02974983096687e-06, |
|
"loss": 1.0535, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.793103448275862, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.022988505747127e-06, |
|
"loss": 1.0301, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.795131845841785, |
|
"grad_norm": 0.25, |
|
"learning_rate": 4.0162271805273836e-06, |
|
"loss": 1.0391, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.7971602434077079, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 4.0094658553076405e-06, |
|
"loss": 1.0251, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.7991886409736308, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 4.002704530087897e-06, |
|
"loss": 1.0301, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.8012170385395536, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 3.995943204868154e-06, |
|
"loss": 1.021, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.8032454361054766, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 3.989181879648411e-06, |
|
"loss": 1.051, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.8052738336713996, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.982420554428668e-06, |
|
"loss": 0.9883, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.8073022312373226, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 3.975659229208925e-06, |
|
"loss": 1.055, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.8093306288032456, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 3.968897903989182e-06, |
|
"loss": 1.0091, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.8113590263691683, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.96213657876944e-06, |
|
"loss": 1.0557, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.8133874239350911, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.955375253549696e-06, |
|
"loss": 1.0326, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.815415821501014, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 3.948613928329953e-06, |
|
"loss": 1.0162, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.817444219066937, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.94185260311021e-06, |
|
"loss": 1.0341, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.81947261663286, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.935091277890467e-06, |
|
"loss": 1.0521, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.821501014198783, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.928329952670723e-06, |
|
"loss": 1.0544, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.8235294117647058, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.9953, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.8255578093306288, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 3.914807302231238e-06, |
|
"loss": 1.019, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8275862068965516, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.908045977011495e-06, |
|
"loss": 0.9932, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.8296146044624746, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.901284651791751e-06, |
|
"loss": 1.0182, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.8316430020283976, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 3.894523326572008e-06, |
|
"loss": 1.0809, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.8336713995943206, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 3.887762001352266e-06, |
|
"loss": 1.0352, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.8356997971602436, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 3.881000676132523e-06, |
|
"loss": 1.0161, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.8377281947261663, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.874239350912779e-06, |
|
"loss": 1.0188, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.839756592292089, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 3.8674780256930365e-06, |
|
"loss": 1.0352, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.841784989858012, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 3.860716700473293e-06, |
|
"loss": 1.0515, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.843813387423935, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 3.85395537525355e-06, |
|
"loss": 1.074, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.845841784989858, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 3.847194050033806e-06, |
|
"loss": 1.0543, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.847870182555781, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 3.840432724814064e-06, |
|
"loss": 1.0243, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.8498985801217038, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.833671399594321e-06, |
|
"loss": 1.0708, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.8519269776876268, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.826910074374578e-06, |
|
"loss": 1.0486, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.8539553752535496, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 3.820148749154835e-06, |
|
"loss": 1.0197, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.8559837728194726, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.8133874239350913e-06, |
|
"loss": 1.0298, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.8580121703853956, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.8066260987153487e-06, |
|
"loss": 1.0123, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.8600405679513186, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.7998647734956056e-06, |
|
"loss": 0.9994, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.8620689655172413, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 3.793103448275862e-06, |
|
"loss": 1.0284, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.8640973630831643, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 3.7863421230561194e-06, |
|
"loss": 1.0786, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.866125760649087, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 3.7795807978363763e-06, |
|
"loss": 1.0081, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.86815415821501, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.7728194726166332e-06, |
|
"loss": 0.9898, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.870182555780933, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.7660581473968897e-06, |
|
"loss": 1.0628, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.872210953346856, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 3.759296822177147e-06, |
|
"loss": 1.0391, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.874239350912779, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 3.752535496957404e-06, |
|
"loss": 1.0142, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.8762677484787018, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 3.7457741717376613e-06, |
|
"loss": 1.046, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.8782961460446246, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.7390128465179178e-06, |
|
"loss": 1.0367, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.8803245436105476, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 3.7322515212981747e-06, |
|
"loss": 0.9889, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"loss": 1.0335, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.8843813387423936, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 3.718728870858689e-06, |
|
"loss": 1.0742, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.8864097363083165, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 3.7119675456389454e-06, |
|
"loss": 1.0103, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.8884381338742393, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.7052062204192023e-06, |
|
"loss": 1.029, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.8904665314401623, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.6984448951994597e-06, |
|
"loss": 1.0185, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.892494929006085, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 3.6916835699797166e-06, |
|
"loss": 1.0273, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.894523326572008, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 3.684922244759973e-06, |
|
"loss": 1.0552, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.896551724137931, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.67816091954023e-06, |
|
"loss": 0.9835, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.898580121703854, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.6713995943204873e-06, |
|
"loss": 1.0271, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.900608519269777, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 3.6646382691007442e-06, |
|
"loss": 1.0514, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.9026369168356998, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.6578769438810007e-06, |
|
"loss": 1.0763, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.9046653144016226, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.651115618661258e-06, |
|
"loss": 1.0356, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.9066937119675456, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 3.644354293441515e-06, |
|
"loss": 1.0892, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9087221095334685, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.637592968221772e-06, |
|
"loss": 1.0789, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.9107505070993915, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.6308316430020284e-06, |
|
"loss": 1.0647, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.9127789046653145, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 3.6240703177822857e-06, |
|
"loss": 1.0863, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.9148073022312373, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.6173089925625426e-06, |
|
"loss": 0.9983, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.9168356997971603, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.6105476673427995e-06, |
|
"loss": 0.9991, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.918864097363083, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.603786342123056e-06, |
|
"loss": 1.1201, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.920892494929006, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 3.5970250169033134e-06, |
|
"loss": 1.0471, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.922920892494929, |
|
"grad_norm": 0.375, |
|
"learning_rate": 3.5902636916835703e-06, |
|
"loss": 0.9676, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.924949290060852, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.5835023664638276e-06, |
|
"loss": 1.0699, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.9269776876267748, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.576741041244084e-06, |
|
"loss": 1.0475, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9290060851926978, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.569979716024341e-06, |
|
"loss": 1.0266, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.9310344827586206, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 3.563218390804598e-06, |
|
"loss": 0.9755, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.9330628803245435, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.5564570655848552e-06, |
|
"loss": 1.0245, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.9350912778904665, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.5496957403651117e-06, |
|
"loss": 1.052, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.9371196754563895, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 3.5429344151453686e-06, |
|
"loss": 1.0402, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.9391480730223125, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 3.536173089925626e-06, |
|
"loss": 1.0447, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.9411764705882353, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 1.0743, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.943204868154158, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.5226504394861394e-06, |
|
"loss": 1.066, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.945233265720081, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.5158891142663963e-06, |
|
"loss": 1.0455, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.947261663286004, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 3.5091277890466536e-06, |
|
"loss": 1.0448, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.949290060851927, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 3.5023664638269105e-06, |
|
"loss": 1.012, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.95131845841785, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 3.495605138607167e-06, |
|
"loss": 0.9866, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.9533468559837728, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 3.4888438133874244e-06, |
|
"loss": 1.0504, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.9553752535496958, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.4820824881676813e-06, |
|
"loss": 1.0103, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.9574036511156185, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 3.475321162947938e-06, |
|
"loss": 1.0262, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.9594320486815415, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.4685598377281947e-06, |
|
"loss": 1.0183, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.9614604462474645, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 3.461798512508452e-06, |
|
"loss": 1.0283, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.9634888438133875, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.455037187288709e-06, |
|
"loss": 1.0364, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.9655172413793105, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 1.0066, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.9675456389452333, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 3.4415145368492227e-06, |
|
"loss": 1.083, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.969574036511156, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 3.4347532116294797e-06, |
|
"loss": 1.0155, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.971602434077079, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.4279918864097366e-06, |
|
"loss": 1.0288, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.973630831643002, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.421230561189994e-06, |
|
"loss": 1.0395, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.975659229208925, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.4144692359702504e-06, |
|
"loss": 1.0201, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.977687626774848, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 3.4077079107505073e-06, |
|
"loss": 1.0418, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.9797160243407708, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.400946585530764e-06, |
|
"loss": 1.0185, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.9817444219066938, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.3941852603110215e-06, |
|
"loss": 1.0386, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.9837728194726165, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.387423935091278e-06, |
|
"loss": 1.0045, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.9858012170385395, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.380662609871535e-06, |
|
"loss": 1.0222, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.9878296146044625, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.3739012846517923e-06, |
|
"loss": 1.0219, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.9898580121703855, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 3.367139959432049e-06, |
|
"loss": 1.0264, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.9918864097363083, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.3603786342123057e-06, |
|
"loss": 1.0746, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.9939148073022313, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 3.3536173089925626e-06, |
|
"loss": 1.0116, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.995943204868154, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.34685598377282e-06, |
|
"loss": 1.02, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.997971602434077, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 3.340094658553077e-06, |
|
"loss": 1.0196, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.0513, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.002028397565923, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 3.3265720081135907e-06, |
|
"loss": 0.9924, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.004056795131846, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.3198106828938476e-06, |
|
"loss": 1.0519, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.006085192697769, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 3.3130493576741045e-06, |
|
"loss": 1.0348, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.0081135902636915, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 3.306288032454361e-06, |
|
"loss": 0.9656, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.0101419878296145, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 3.2995267072346183e-06, |
|
"loss": 1.0121, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.0121703853955375, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 3.2927653820148752e-06, |
|
"loss": 1.0166, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.0141987829614605, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 3.2860040567951326e-06, |
|
"loss": 1.0339, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.0162271805273835, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 3.279242731575389e-06, |
|
"loss": 1.0445, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.0182555780933065, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.272481406355646e-06, |
|
"loss": 1.0673, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.020283975659229, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.265720081135903e-06, |
|
"loss": 0.9861, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.022312373225152, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 3.25895875591616e-06, |
|
"loss": 1.0428, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.024340770791075, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.2521974306964167e-06, |
|
"loss": 1.0235, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.026369168356998, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.2454361054766736e-06, |
|
"loss": 1.0356, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.028397565922921, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.2386747802569305e-06, |
|
"loss": 1.0242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.030425963488844, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.231913455037188e-06, |
|
"loss": 1.0575, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.032454361054767, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.2251521298174443e-06, |
|
"loss": 1.0393, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.0344827586206895, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 3.2183908045977012e-06, |
|
"loss": 0.9703, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.0365111561866125, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.2116294793779586e-06, |
|
"loss": 1.0081, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.0385395537525355, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 3.2048681541582155e-06, |
|
"loss": 1.0917, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.0405679513184585, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.198106828938472e-06, |
|
"loss": 1.0668, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.0425963488843815, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.191345503718729e-06, |
|
"loss": 1.036, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.0446247464503045, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 3.1845841784989862e-06, |
|
"loss": 1.0184, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.046653144016227, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 3.177822853279243e-06, |
|
"loss": 1.0415, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.04868154158215, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 3.1710615280594996e-06, |
|
"loss": 1.0184, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.050709939148073, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.164300202839757e-06, |
|
"loss": 1.0386, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.052738336713996, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.157538877620014e-06, |
|
"loss": 1.0234, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.054766734279919, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.1507775524002708e-06, |
|
"loss": 1.0344, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.056795131845842, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 3.1440162271805273e-06, |
|
"loss": 1.0343, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.0588235294117645, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"loss": 1.034, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.0608519269776875, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.1304935767410415e-06, |
|
"loss": 1.0121, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.0628803245436105, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 3.123732251521299e-06, |
|
"loss": 1.022, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.0649087221095335, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 3.1169709263015553e-06, |
|
"loss": 1.1013, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.0669371196754565, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 3.1102096010818122e-06, |
|
"loss": 0.994, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 3.103448275862069e-06, |
|
"loss": 1.0659, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.0709939148073024, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.0966869506423265e-06, |
|
"loss": 1.0322, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.073022312373225, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 3.089925625422583e-06, |
|
"loss": 1.0494, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.075050709939148, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 3.08316430020284e-06, |
|
"loss": 1.0761, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.077079107505071, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 3.0764029749830972e-06, |
|
"loss": 1.0021, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.079107505070994, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.069641649763354e-06, |
|
"loss": 0.9927, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.081135902636917, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.0628803245436106e-06, |
|
"loss": 0.999, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.08316430020284, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.0561189993238675e-06, |
|
"loss": 1.0619, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.0851926977687625, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.049357674104125e-06, |
|
"loss": 1.0605, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.0872210953346855, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 3.0425963488843818e-06, |
|
"loss": 0.971, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.0892494929006085, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.0358350236646383e-06, |
|
"loss": 1.0451, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.0912778904665315, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.0290736984448956e-06, |
|
"loss": 1.0517, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.0933062880324544, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 3.0223123732251525e-06, |
|
"loss": 1.0314, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.0953346855983774, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.0155510480054094e-06, |
|
"loss": 1.0495, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.0973630831643, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.008789722785666e-06, |
|
"loss": 1.0287, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.099391480730223, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 3.0020283975659233e-06, |
|
"loss": 1.0541, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.101419878296146, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.99526707234618e-06, |
|
"loss": 1.045, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.103448275862069, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.988505747126437e-06, |
|
"loss": 1.0201, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.105476673427992, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 2.9817444219066936e-06, |
|
"loss": 1.0175, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.107505070993915, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.974983096686951e-06, |
|
"loss": 1.0352, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.109533468559838, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 2.968221771467208e-06, |
|
"loss": 1.0876, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.1115618661257605, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 2.961460446247465e-06, |
|
"loss": 0.9568, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.1135902636916835, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 2.9546991210277216e-06, |
|
"loss": 0.9813, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.1156186612576064, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 2.9479377958079785e-06, |
|
"loss": 1.0693, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.1176470588235294, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.9901, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.1196754563894524, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.934415145368493e-06, |
|
"loss": 1.0182, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.1217038539553754, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.9276538201487493e-06, |
|
"loss": 1.0229, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.123732251521298, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.920892494929006e-06, |
|
"loss": 1.06, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.125760649087221, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 2.9141311697092635e-06, |
|
"loss": 1.0253, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.127789046653144, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 2.9073698444895204e-06, |
|
"loss": 1.0695, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.129817444219067, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 2.900608519269777e-06, |
|
"loss": 1.0031, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.13184584178499, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 2.893847194050034e-06, |
|
"loss": 1.0045, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.133874239350913, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.887085868830291e-06, |
|
"loss": 1.0059, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.135902636916836, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 2.880324543610548e-06, |
|
"loss": 1.004, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.1379310344827585, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.8735632183908046e-06, |
|
"loss": 1.0363, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.1399594320486814, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 2.866801893171062e-06, |
|
"loss": 1.0313, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.1419878296146044, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.860040567951319e-06, |
|
"loss": 1.0128, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.1440162271805274, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 2.8532792427315757e-06, |
|
"loss": 1.0291, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.1460446247464504, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.8465179175118322e-06, |
|
"loss": 1.0473, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.1480730223123734, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.8397565922920896e-06, |
|
"loss": 1.018, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.150101419878296, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 2.8329952670723465e-06, |
|
"loss": 1.0078, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.152129817444219, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.8262339418526034e-06, |
|
"loss": 1.0386, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.154158215010142, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 2.8194726166328603e-06, |
|
"loss": 1.0723, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.156186612576065, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.812711291413117e-06, |
|
"loss": 1.0031, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.158215010141988, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.805949966193374e-06, |
|
"loss": 1.0709, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.160243407707911, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.7991886409736314e-06, |
|
"loss": 1.0254, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.162271805273834, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.792427315753888e-06, |
|
"loss": 0.9745, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.1643002028397564, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 2.785665990534145e-06, |
|
"loss": 0.9904, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.1663286004056794, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 2.7789046653144018e-06, |
|
"loss": 1.0366, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.1683569979716024, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 2.772143340094659e-06, |
|
"loss": 0.9916, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.1703853955375254, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 2.7653820148749156e-06, |
|
"loss": 1.0308, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.1724137931034484, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 0.987, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.1744421906693714, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.75185936443543e-06, |
|
"loss": 1.036, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.176470588235294, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 2.7450980392156867e-06, |
|
"loss": 1.0158, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.178498985801217, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 2.7383367139959432e-06, |
|
"loss": 1.0426, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.18052738336714, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.7315753887762e-06, |
|
"loss": 1.0285, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.182555780933063, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.7248140635564575e-06, |
|
"loss": 0.9971, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.184584178498986, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 2.7180527383367144e-06, |
|
"loss": 1.0294, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.186612576064909, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 2.711291413116971e-06, |
|
"loss": 1.017, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.1886409736308314, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.704530087897228e-06, |
|
"loss": 1.0512, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.1906693711967544, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.697768762677485e-06, |
|
"loss": 1.0469, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.1926977687626774, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 2.691007437457742e-06, |
|
"loss": 1.0847, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.1947261663286004, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.6842461122379985e-06, |
|
"loss": 1.0098, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.1967545638945234, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.677484787018256e-06, |
|
"loss": 1.0116, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.1987829614604464, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 2.6707234617985128e-06, |
|
"loss": 1.0333, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.2008113590263694, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 2.66396213657877e-06, |
|
"loss": 0.9811, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.202839756592292, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.6572008113590266e-06, |
|
"loss": 1.0221, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.204868154158215, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 2.6504394861392835e-06, |
|
"loss": 1.0477, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.206896551724138, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 2.6436781609195404e-06, |
|
"loss": 1.0179, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.208924949290061, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 2.6369168356997977e-06, |
|
"loss": 0.9656, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.210953346855984, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 2.6301555104800542e-06, |
|
"loss": 0.9941, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.212981744421907, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 2.623394185260311e-06, |
|
"loss": 0.9892, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.2150101419878294, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.616632860040568e-06, |
|
"loss": 1.0165, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.2170385395537524, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.6098715348208254e-06, |
|
"loss": 1.0287, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.2190669371196754, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.603110209601082e-06, |
|
"loss": 1.0269, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.2210953346855984, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.596348884381339e-06, |
|
"loss": 1.0034, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.2231237322515214, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.589587559161596e-06, |
|
"loss": 1.024, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.2251521298174444, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.582826233941853e-06, |
|
"loss": 1.0846, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.227180527383367, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 2.5760649087221095e-06, |
|
"loss": 1.0558, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.22920892494929, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.5693035835023664e-06, |
|
"loss": 0.9899, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.231237322515213, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.5625422582826238e-06, |
|
"loss": 1.0611, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.233265720081136, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 2.5557809330628807e-06, |
|
"loss": 1.0497, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.235294117647059, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 2.549019607843137e-06, |
|
"loss": 1.037, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.237322515212982, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.5422582826233945e-06, |
|
"loss": 1.0295, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.239350912778905, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.5354969574036514e-06, |
|
"loss": 1.0433, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.5287356321839083e-06, |
|
"loss": 1.002, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.2434077079107504, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.521974306964165e-06, |
|
"loss": 1.0088, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.2454361054766734, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 2.515212981744422e-06, |
|
"loss": 0.9883, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.2474645030425964, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.508451656524679e-06, |
|
"loss": 1.0244, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.2494929006085194, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 2.5016903313049364e-06, |
|
"loss": 1.0374, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.2515212981744424, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.494929006085193e-06, |
|
"loss": 1.0797, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.2535496957403653, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 2.48816768086545e-06, |
|
"loss": 1.0207, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.255578093306288, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.4814063556457067e-06, |
|
"loss": 1.0208, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.257606490872211, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 2.4746450304259636e-06, |
|
"loss": 1.0544, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.259634888438134, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 2.4678837052062205e-06, |
|
"loss": 0.9771, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.261663286004057, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 2.4611223799864774e-06, |
|
"loss": 1.0019, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.26369168356998, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.4543610547667348e-06, |
|
"loss": 1.0274, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.2657200811359024, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 2.4475997295469913e-06, |
|
"loss": 1.0097, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.2677484787018254, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 2.4408384043272486e-06, |
|
"loss": 1.0111, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.2697768762677484, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.434077079107505e-06, |
|
"loss": 1.0339, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.2718052738336714, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.4273157538877624e-06, |
|
"loss": 1.0308, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.2738336713995944, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 2.420554428668019e-06, |
|
"loss": 1.0253, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.2758620689655173, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.4137931034482762e-06, |
|
"loss": 1.0244, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.2778904665314403, |
|
"grad_norm": 0.375, |
|
"learning_rate": 2.407031778228533e-06, |
|
"loss": 1.014, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.279918864097363, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.40027045300879e-06, |
|
"loss": 1.037, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.281947261663286, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.393509127789047e-06, |
|
"loss": 1.0582, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.283975659229209, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 2.386747802569304e-06, |
|
"loss": 1.0285, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.286004056795132, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.379986477349561e-06, |
|
"loss": 1.0411, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.288032454361055, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.3732251521298177e-06, |
|
"loss": 1.0125, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.290060851926978, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 2.3664638269100746e-06, |
|
"loss": 1.0455, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.292089249492901, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 2.3597025016903315e-06, |
|
"loss": 1.0079, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.2941176470588234, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 1.0286, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.2961460446247464, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 2.3461798512508454e-06, |
|
"loss": 1.0324, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.2981744421906694, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 2.3394185260311023e-06, |
|
"loss": 1.0273, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.3002028397565923, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.332657200811359e-06, |
|
"loss": 1.0111, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.3022312373225153, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 2.325895875591616e-06, |
|
"loss": 0.9797, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.3042596348884383, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 2.319134550371873e-06, |
|
"loss": 0.9904, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.306288032454361, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.31237322515213e-06, |
|
"loss": 1.0559, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.308316430020284, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.305611899932387e-06, |
|
"loss": 1.0311, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.310344827586207, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.2988505747126437e-06, |
|
"loss": 1.0926, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.31237322515213, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.292089249492901e-06, |
|
"loss": 1.0408, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.314401622718053, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.2853279242731576e-06, |
|
"loss": 1.0569, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.316430020283976, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.278566599053415e-06, |
|
"loss": 1.0491, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.3184584178498984, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 2.2718052738336714e-06, |
|
"loss": 1.0171, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.3204868154158214, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.2650439486139287e-06, |
|
"loss": 1.0477, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.3225152129817443, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.2582826233941852e-06, |
|
"loss": 1.0472, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.3245436105476673, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.2515212981744425e-06, |
|
"loss": 1.0426, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.3265720081135903, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 2.2447599729546995e-06, |
|
"loss": 1.0696, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.3286004056795133, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.2379986477349564e-06, |
|
"loss": 1.0435, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.3306288032454363, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.2312373225152133e-06, |
|
"loss": 1.0468, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.332657200811359, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.22447599729547e-06, |
|
"loss": 1.013, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.334685598377282, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.217714672075727e-06, |
|
"loss": 1.0125, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.336713995943205, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.210953346855984e-06, |
|
"loss": 1.0292, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.338742393509128, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 2.204192021636241e-06, |
|
"loss": 0.9943, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.340770791075051, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 2.197430696416498e-06, |
|
"loss": 1.0632, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.342799188640974, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.1906693711967548e-06, |
|
"loss": 0.9995, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.344827586206897, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 2.1839080459770117e-06, |
|
"loss": 0.983, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.3468559837728193, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 2.1771467207572686e-06, |
|
"loss": 1.003, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.3488843813387423, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.1703853955375255e-06, |
|
"loss": 1.0498, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.3509127789046653, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 2.1636240703177824e-06, |
|
"loss": 1.0288, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"loss": 1.0166, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.3549695740365113, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.1501014198782962e-06, |
|
"loss": 1.0634, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.356997971602434, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 2.1433400946585536e-06, |
|
"loss": 1.0077, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.359026369168357, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 2.13657876943881e-06, |
|
"loss": 1.0331, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.36105476673428, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.1298174442190674e-06, |
|
"loss": 0.987, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.363083164300203, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.123056118999324e-06, |
|
"loss": 1.0156, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.365111561866126, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.116294793779581e-06, |
|
"loss": 0.9946, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.367139959432049, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.1095334685598377e-06, |
|
"loss": 0.9646, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.369168356997972, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.102772143340095e-06, |
|
"loss": 1.0295, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.3711967545638943, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.096010818120352e-06, |
|
"loss": 1.0657, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.3732251521298173, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 2.089249492900609e-06, |
|
"loss": 1.0251, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.3752535496957403, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.0824881676808658e-06, |
|
"loss": 1.0299, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.3772819472616633, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.0757268424611227e-06, |
|
"loss": 1.0147, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.3793103448275863, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 2.0689655172413796e-06, |
|
"loss": 0.9979, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.3813387423935093, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.0622041920216365e-06, |
|
"loss": 1.0701, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.3833671399594323, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.0554428668018934e-06, |
|
"loss": 1.0585, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.385395537525355, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 2.0486815415821503e-06, |
|
"loss": 1.0384, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.387423935091278, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 2.0419202163624072e-06, |
|
"loss": 1.0231, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.389452332657201, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 2.035158891142664e-06, |
|
"loss": 0.9854, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.391480730223124, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.028397565922921e-06, |
|
"loss": 0.9973, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.393509127789047, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 2.021636240703178e-06, |
|
"loss": 1.0059, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.3955375253549693, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.014874915483435e-06, |
|
"loss": 1.0267, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.3975659229208923, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 2.0081135902636918e-06, |
|
"loss": 1.015, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.3995943204868153, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.0013522650439487e-06, |
|
"loss": 1.0269, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.4016227180527383, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 1.9945909398242056e-06, |
|
"loss": 1.0155, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.4036511156186613, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.9878296146044625e-06, |
|
"loss": 1.0457, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.4056795131845843, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 1.98106828938472e-06, |
|
"loss": 0.9996, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.4077079107505073, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.9743069641649763e-06, |
|
"loss": 1.0397, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.40973630831643, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.9675456389452337e-06, |
|
"loss": 1.0423, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.411764705882353, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 1.0214, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.9540229885057475e-06, |
|
"loss": 0.9866, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.415821501014199, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.947261663286004e-06, |
|
"loss": 1.045, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.417849898580122, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 1.9405003380662613e-06, |
|
"loss": 1.0609, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.4198782961460448, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.9337390128465182e-06, |
|
"loss": 1.0639, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.4219066937119678, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 1.926977687626775e-06, |
|
"loss": 1.0107, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.4239350912778903, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 1.920216362407032e-06, |
|
"loss": 1.041, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.4259634888438133, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.913455037187289e-06, |
|
"loss": 1.0544, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.4279918864097363, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.9066937119675457e-06, |
|
"loss": 1.0184, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.4300202839756593, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 1.8999323867478028e-06, |
|
"loss": 1.0371, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.4320486815415823, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 1.8931710615280597e-06, |
|
"loss": 1.0313, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.4340770791075053, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 1.8864097363083166e-06, |
|
"loss": 1.0275, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.436105476673428, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.8796484110885735e-06, |
|
"loss": 1.0283, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.438133874239351, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.8728870858688306e-06, |
|
"loss": 1.0195, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.440162271805274, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.8661257606490873e-06, |
|
"loss": 1.0568, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.4421906693711968, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 1.8593644354293445e-06, |
|
"loss": 1.0432, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.4442190669371198, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.8526031102096012e-06, |
|
"loss": 1.0448, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.4462474645030428, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.8458417849898583e-06, |
|
"loss": 1.0647, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.4482758620689653, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.839080459770115e-06, |
|
"loss": 1.0329, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.4503042596348883, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.8323191345503721e-06, |
|
"loss": 1.0383, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.4523326572008113, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.825557809330629e-06, |
|
"loss": 1.0725, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.4543610547667343, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.818796484110886e-06, |
|
"loss": 1.0147, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.4563894523326573, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.8120351588911429e-06, |
|
"loss": 1.0861, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.4584178498985803, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 1.8052738336713998e-06, |
|
"loss": 1.0131, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.4604462474645032, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.7985125084516567e-06, |
|
"loss": 1.0415, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.462474645030426, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.7917511832319138e-06, |
|
"loss": 0.9922, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.464503042596349, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 1.7849898580121705e-06, |
|
"loss": 1.0788, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.4665314401622718, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.7782285327924276e-06, |
|
"loss": 1.0135, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.4685598377281948, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.7714672075726843e-06, |
|
"loss": 1.0231, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.4705882352941178, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"loss": 1.0446, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.4726166328600407, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.7579445571331981e-06, |
|
"loss": 1.0019, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.4746450304259637, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.7511832319134553e-06, |
|
"loss": 0.9993, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.4766734279918863, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 1.7444219066937122e-06, |
|
"loss": 0.9909, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.4787018255578093, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.737660581473969e-06, |
|
"loss": 1.0211, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.4807302231237323, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.730899256254226e-06, |
|
"loss": 1.0205, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.4827586206896552, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.724137931034483e-06, |
|
"loss": 1.0238, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.4847870182555782, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.7173766058147398e-06, |
|
"loss": 0.9695, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.486815415821501, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.710615280594997e-06, |
|
"loss": 0.9944, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.4888438133874238, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 1.7038539553752536e-06, |
|
"loss": 1.0094, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.4908722109533468, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 1.6970926301555108e-06, |
|
"loss": 1.0162, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.4929006085192698, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.6903313049357675e-06, |
|
"loss": 1.0123, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.4949290060851927, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 1.6835699797160246e-06, |
|
"loss": 1.0224, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.4969574036511157, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.6768086544962813e-06, |
|
"loss": 0.9803, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 2.4989858012170387, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.6700473292765384e-06, |
|
"loss": 1.0051, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.5010141987829613, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.6632860040567953e-06, |
|
"loss": 1.0288, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 2.5030425963488843, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.6565246788370522e-06, |
|
"loss": 1.0262, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 2.5050709939148073, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 1.6497633536173092e-06, |
|
"loss": 1.0119, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.5070993914807302, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 1.6430020283975663e-06, |
|
"loss": 1.0361, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.5091277890466532, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.636240703177823e-06, |
|
"loss": 0.9858, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 2.5111561866125762, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.62947937795808e-06, |
|
"loss": 1.0318, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 2.513184584178499, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 1.6227180527383368e-06, |
|
"loss": 1.0007, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 2.5152129817444218, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.615956727518594e-06, |
|
"loss": 1.0325, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.5172413793103448, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.6091954022988506e-06, |
|
"loss": 1.0019, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 2.5192697768762677, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.6024340770791077e-06, |
|
"loss": 1.0205, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 2.5212981744421907, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 1.5956727518593644e-06, |
|
"loss": 1.0035, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 2.5233265720081137, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 1.5889114266396216e-06, |
|
"loss": 1.0094, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 2.5253549695740363, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 1.5821501014198785e-06, |
|
"loss": 1.0204, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.5273833671399597, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.5753887762001354e-06, |
|
"loss": 0.9897, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 2.5294117647058822, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 1.051, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 2.5314401622718052, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 1.5618661257606494e-06, |
|
"loss": 0.9939, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 2.5334685598377282, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.5551048005409061e-06, |
|
"loss": 1.0292, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 2.535496957403651, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.5483434753211632e-06, |
|
"loss": 1.1074, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.537525354969574, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.54158215010142e-06, |
|
"loss": 0.9867, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.5395537525354968, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.534820824881677e-06, |
|
"loss": 1.0396, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.5415821501014197, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 1.5280594996619338e-06, |
|
"loss": 0.9953, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.5436105476673427, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 1.5212981744421909e-06, |
|
"loss": 1.0594, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.5456389452332657, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 1.5145368492224478e-06, |
|
"loss": 1.0398, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.5476673427991887, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.5077755240027047e-06, |
|
"loss": 1.0247, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.5496957403651117, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.5010141987829616e-06, |
|
"loss": 1.003, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.5517241379310347, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 1.4942528735632185e-06, |
|
"loss": 1.0286, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.5537525354969572, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.4874915483434755e-06, |
|
"loss": 1.0062, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.5557809330628802, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 1.4807302231237326e-06, |
|
"loss": 1.0012, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.5578093306288032, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 1.4739688979039893e-06, |
|
"loss": 1.0322, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.559837728194726, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.4672075726842464e-06, |
|
"loss": 1.0477, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.561866125760649, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.460446247464503e-06, |
|
"loss": 1.0406, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.5638945233265718, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.4536849222447602e-06, |
|
"loss": 1.0073, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.565922920892495, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.446923597025017e-06, |
|
"loss": 1.0306, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.5679513184584177, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 1.440162271805274e-06, |
|
"loss": 0.9963, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.5699797160243407, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.433400946585531e-06, |
|
"loss": 0.9997, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.5720081135902637, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.4266396213657879e-06, |
|
"loss": 1.0072, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.5740365111561867, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 1.4198782961460448e-06, |
|
"loss": 1.0053, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.5760649087221097, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.4131169709263017e-06, |
|
"loss": 1.029, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.5780933062880322, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.4063556457065586e-06, |
|
"loss": 1.0295, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.5801217038539552, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.3995943204868157e-06, |
|
"loss": 1.0263, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.582150101419878, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.3928329952670724e-06, |
|
"loss": 0.997, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.584178498985801, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.3860716700473295e-06, |
|
"loss": 1.0262, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.3793103448275862e-06, |
|
"loss": 1.0284, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.588235294117647, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"loss": 0.9981, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.59026369168357, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.3657876943881e-06, |
|
"loss": 1.0424, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.5922920892494927, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 1.3590263691683572e-06, |
|
"loss": 1.0289, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.5943204868154157, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.352265043948614e-06, |
|
"loss": 1.0007, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.5963488843813387, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.345503718728871e-06, |
|
"loss": 1.0017, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.5983772819472617, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.338742393509128e-06, |
|
"loss": 1.0152, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.6004056795131847, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.331981068289385e-06, |
|
"loss": 1.0119, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.6024340770791072, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 1.3252197430696418e-06, |
|
"loss": 1.0538, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.6044624746450307, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.3184584178498989e-06, |
|
"loss": 1.0105, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.606490872210953, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.3116970926301556e-06, |
|
"loss": 0.9894, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.608519269776876, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.3049357674104127e-06, |
|
"loss": 0.9832, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.610547667342799, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 1.2981744421906694e-06, |
|
"loss": 1.0766, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.612576064908722, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.2914131169709265e-06, |
|
"loss": 1.0027, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.614604462474645, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.2846517917511832e-06, |
|
"loss": 1.0245, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.6166328600405677, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.2778904665314403e-06, |
|
"loss": 1.065, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.6186612576064907, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.2711291413116973e-06, |
|
"loss": 1.0342, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.6206896551724137, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.2643678160919542e-06, |
|
"loss": 1.045, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.6227180527383367, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.257606490872211e-06, |
|
"loss": 1.0383, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.6247464503042597, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.2508451656524682e-06, |
|
"loss": 0.9908, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.6267748478701827, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.244083840432725e-06, |
|
"loss": 1.048, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.6288032454361057, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 1.2373225152129818e-06, |
|
"loss": 1.0501, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.630831643002028, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.2305611899932387e-06, |
|
"loss": 0.9794, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.632860040567951, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 1.2237998647734956e-06, |
|
"loss": 0.9768, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.634888438133874, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.2170385395537525e-06, |
|
"loss": 1.0291, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.636916835699797, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.2102772143340095e-06, |
|
"loss": 1.0042, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.63894523326572, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.2035158891142666e-06, |
|
"loss": 1.049, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.640973630831643, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 1.1967545638945235e-06, |
|
"loss": 1.0256, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.643002028397566, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.1899932386747804e-06, |
|
"loss": 0.9939, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.6450304259634887, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.1832319134550373e-06, |
|
"loss": 1.0379, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.6470588235294117, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 1.0315, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.6490872210953347, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.1697092630155511e-06, |
|
"loss": 1.0205, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.6511156186612577, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 1.162947937795808e-06, |
|
"loss": 1.0316, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.6531440162271807, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.156186612576065e-06, |
|
"loss": 1.05, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.655172413793103, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.1494252873563219e-06, |
|
"loss": 1.0154, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.6572008113590266, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.1426639621365788e-06, |
|
"loss": 1.0074, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.659229208924949, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.1359026369168357e-06, |
|
"loss": 1.0492, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.661257606490872, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 1.1291413116970926e-06, |
|
"loss": 1.0047, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.663286004056795, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 1.1223799864773497e-06, |
|
"loss": 1.0508, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.665314401622718, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.1156186612576066e-06, |
|
"loss": 1.0369, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.667342799188641, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.1088573360378636e-06, |
|
"loss": 1.0213, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.6693711967545637, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.1020960108181205e-06, |
|
"loss": 1.0402, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.6713995943204867, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 1.0953346855983774e-06, |
|
"loss": 0.9848, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.6734279918864097, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 1.0885733603786343e-06, |
|
"loss": 1.0522, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.6754563894523327, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 1.0818120351588912e-06, |
|
"loss": 1.0088, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 2.6774847870182557, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 1.0750507099391481e-06, |
|
"loss": 1.0378, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.6795131845841786, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 1.068289384719405e-06, |
|
"loss": 1.0448, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 2.6815415821501016, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 1.061528059499662e-06, |
|
"loss": 1.0244, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 2.683569979716024, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.0547667342799188e-06, |
|
"loss": 1.0069, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 2.685598377281947, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.048005409060176e-06, |
|
"loss": 1.042, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.68762677484787, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.0412440838404329e-06, |
|
"loss": 1.0167, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.689655172413793, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 1.0344827586206898e-06, |
|
"loss": 1.0087, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.691683569979716, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.0277214334009467e-06, |
|
"loss": 1.0285, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 2.6937119675456387, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 1.0209601081812036e-06, |
|
"loss": 1.0014, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 2.695740365111562, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 1.0141987829614605e-06, |
|
"loss": 1.0103, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 2.6977687626774847, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.0074374577417174e-06, |
|
"loss": 1.0248, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.6997971602434077, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.0006761325219743e-06, |
|
"loss": 1.0008, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 2.7018255578093306, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 9.939148073022313e-07, |
|
"loss": 0.9954, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 2.7038539553752536, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 9.871534820824882e-07, |
|
"loss": 1.0192, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 2.7058823529411766, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 1.0073, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 2.707910750507099, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 9.73630831643002e-07, |
|
"loss": 1.0504, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.709939148073022, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 9.668695064232591e-07, |
|
"loss": 1.0183, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 2.711967545638945, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 9.60108181203516e-07, |
|
"loss": 1.0052, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 2.713995943204868, |
|
"grad_norm": 0.25, |
|
"learning_rate": 9.533468559837728e-07, |
|
"loss": 1.0111, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 2.716024340770791, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 9.465855307640299e-07, |
|
"loss": 1.0238, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 2.718052738336714, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 9.398242055442868e-07, |
|
"loss": 1.0333, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.720081135902637, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 9.330628803245437e-07, |
|
"loss": 1.0292, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 2.7221095334685597, |
|
"grad_norm": 0.244140625, |
|
"learning_rate": 9.263015551048006e-07, |
|
"loss": 0.9643, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.7241379310344827, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 9.195402298850575e-07, |
|
"loss": 1.0272, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 2.7261663286004056, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 9.127789046653145e-07, |
|
"loss": 1.0355, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.7281947261663286, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 9.060175794455714e-07, |
|
"loss": 1.0653, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 2.7302231237322516, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 8.992562542258283e-07, |
|
"loss": 1.0541, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 2.732251521298174, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 8.924949290060852e-07, |
|
"loss": 1.0, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 2.7342799188640976, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 8.857336037863422e-07, |
|
"loss": 1.0285, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 2.73630831643002, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 8.789722785665991e-07, |
|
"loss": 1.0156, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 2.738336713995943, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 8.722109533468561e-07, |
|
"loss": 1.0228, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.740365111561866, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 8.65449628127113e-07, |
|
"loss": 1.0123, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 2.742393509127789, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 8.586883029073699e-07, |
|
"loss": 1.0359, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 2.744421906693712, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 8.519269776876268e-07, |
|
"loss": 1.0283, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 2.7464503042596347, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 8.451656524678837e-07, |
|
"loss": 1.0165, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 2.7484787018255576, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 8.384043272481406e-07, |
|
"loss": 1.0471, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 2.7505070993914806, |
|
"grad_norm": 0.25, |
|
"learning_rate": 8.316430020283977e-07, |
|
"loss": 1.037, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 2.7525354969574036, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 8.248816768086546e-07, |
|
"loss": 1.0044, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 2.7545638945233266, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 8.181203515889115e-07, |
|
"loss": 1.0651, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 2.7565922920892496, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 8.113590263691684e-07, |
|
"loss": 1.0226, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 8.045977011494253e-07, |
|
"loss": 1.0059, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.760649087221095, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 7.978363759296822e-07, |
|
"loss": 0.9958, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 2.762677484787018, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 7.910750507099392e-07, |
|
"loss": 1.0321, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 2.764705882352941, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 7.843137254901962e-07, |
|
"loss": 1.0443, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 2.766734279918864, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 7.775524002704531e-07, |
|
"loss": 1.0088, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 2.768762677484787, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 7.7079107505071e-07, |
|
"loss": 1.0268, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 2.77079107505071, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 7.640297498309669e-07, |
|
"loss": 1.0209, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 2.772819472616633, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 7.572684246112239e-07, |
|
"loss": 0.9831, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 2.7748478701825556, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 7.505070993914808e-07, |
|
"loss": 0.9727, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 2.7768762677484786, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 7.437457741717377e-07, |
|
"loss": 1.012, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 2.7789046653144016, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 7.369844489519946e-07, |
|
"loss": 1.0417, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.7809330628803246, |
|
"grad_norm": 0.25, |
|
"learning_rate": 7.302231237322515e-07, |
|
"loss": 0.9739, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 2.7829614604462476, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 7.234617985125085e-07, |
|
"loss": 1.0208, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 2.78498985801217, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 7.167004732927655e-07, |
|
"loss": 1.0333, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 2.7870182555780936, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 7.099391480730224e-07, |
|
"loss": 0.9805, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 2.789046653144016, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 7.031778228532793e-07, |
|
"loss": 1.0046, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.791075050709939, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 6.964164976335362e-07, |
|
"loss": 1.0241, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 2.793103448275862, |
|
"grad_norm": 0.25, |
|
"learning_rate": 6.896551724137931e-07, |
|
"loss": 1.0465, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 2.795131845841785, |
|
"grad_norm": 0.25, |
|
"learning_rate": 6.8289384719405e-07, |
|
"loss": 0.9962, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 2.797160243407708, |
|
"grad_norm": 0.25, |
|
"learning_rate": 6.76132521974307e-07, |
|
"loss": 1.0428, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 2.7991886409736306, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 6.69371196754564e-07, |
|
"loss": 0.9963, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.8012170385395536, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 6.626098715348209e-07, |
|
"loss": 0.9982, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 2.8032454361054766, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 6.558485463150778e-07, |
|
"loss": 1.038, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 2.8052738336713996, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 6.490872210953347e-07, |
|
"loss": 1.0555, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 2.8073022312373226, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 6.423258958755916e-07, |
|
"loss": 1.0506, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 2.8093306288032456, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 6.355645706558486e-07, |
|
"loss": 0.986, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 2.8113590263691686, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 6.288032454361055e-07, |
|
"loss": 1.0352, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 2.813387423935091, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 6.220419202163624e-07, |
|
"loss": 1.0198, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 2.815415821501014, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 6.152805949966194e-07, |
|
"loss": 1.0464, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.817444219066937, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 6.085192697768763e-07, |
|
"loss": 1.029, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 2.81947261663286, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 6.017579445571333e-07, |
|
"loss": 1.0276, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.821501014198783, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.949966193373902e-07, |
|
"loss": 1.0087, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 2.8235294117647056, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.9787, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 2.825557809330629, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 5.81473968897904e-07, |
|
"loss": 0.9883, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 2.8275862068965516, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 5.747126436781609e-07, |
|
"loss": 1.0263, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 2.8296146044624746, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 5.679513184584178e-07, |
|
"loss": 1.0101, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.8316430020283976, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.611899932386749e-07, |
|
"loss": 1.0315, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 2.8336713995943206, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 5.544286680189318e-07, |
|
"loss": 1.0142, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 2.8356997971602436, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.476673427991887e-07, |
|
"loss": 1.007, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 2.837728194726166, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 5.409060175794456e-07, |
|
"loss": 1.0391, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 2.839756592292089, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 5.341446923597025e-07, |
|
"loss": 1.0453, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.841784989858012, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.273833671399594e-07, |
|
"loss": 1.0113, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 2.843813387423935, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.206220419202164e-07, |
|
"loss": 1.026, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 2.845841784989858, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.138607167004734e-07, |
|
"loss": 1.0115, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 2.847870182555781, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 5.070993914807303e-07, |
|
"loss": 1.0217, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 2.849898580121704, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 5.003380662609872e-07, |
|
"loss": 1.0084, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 2.8519269776876266, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 4.935767410412441e-07, |
|
"loss": 0.988, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 2.8539553752535496, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 4.86815415821501e-07, |
|
"loss": 1.0294, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 2.8559837728194726, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.80054090601758e-07, |
|
"loss": 1.0212, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 2.8580121703853956, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 4.732927653820149e-07, |
|
"loss": 1.043, |
|
"step": 1409 |
|
}, |
|
{ |
|
"epoch": 2.8600405679513186, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 4.6653144016227184e-07, |
|
"loss": 0.9945, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.862068965517241, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 4.5977011494252875e-07, |
|
"loss": 0.9841, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 2.8640973630831645, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 4.530087897227857e-07, |
|
"loss": 1.0136, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 2.866125760649087, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.462474645030426e-07, |
|
"loss": 1.0142, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 2.86815415821501, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 4.3948613928329954e-07, |
|
"loss": 1.022, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 2.870182555780933, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.327248140635565e-07, |
|
"loss": 1.0315, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 2.872210953346856, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.259634888438134e-07, |
|
"loss": 1.0136, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 2.874239350912779, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.192021636240703e-07, |
|
"loss": 1.006, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 2.8762677484787016, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 4.124408384043273e-07, |
|
"loss": 0.9906, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 2.8782961460446246, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 4.056795131845842e-07, |
|
"loss": 1.0487, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 2.8803245436105476, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.989181879648411e-07, |
|
"loss": 1.0244, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.8823529411764706, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 1.0045, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 2.8843813387423936, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.85395537525355e-07, |
|
"loss": 1.0387, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.8864097363083165, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 3.7863421230561195e-07, |
|
"loss": 1.0187, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 2.8884381338742395, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 3.7187288708586886e-07, |
|
"loss": 0.9929, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 2.890466531440162, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.651115618661258e-07, |
|
"loss": 1.013, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.892494929006085, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 3.5835023664638274e-07, |
|
"loss": 0.9687, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 2.894523326572008, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 3.5158891142663965e-07, |
|
"loss": 1.0375, |
|
"step": 1427 |
|
}, |
|
{ |
|
"epoch": 2.896551724137931, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"loss": 1.0068, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 2.898580121703854, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 3.380662609871535e-07, |
|
"loss": 1.0095, |
|
"step": 1429 |
|
}, |
|
{ |
|
"epoch": 2.900608519269777, |
|
"grad_norm": 0.25, |
|
"learning_rate": 3.3130493576741044e-07, |
|
"loss": 1.0516, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.9026369168357, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 3.2454361054766735e-07, |
|
"loss": 0.9735, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 2.9046653144016226, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 3.177822853279243e-07, |
|
"loss": 0.9895, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 2.9066937119675456, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.110209601081812e-07, |
|
"loss": 1.0294, |
|
"step": 1433 |
|
}, |
|
{ |
|
"epoch": 2.9087221095334685, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.0425963488843814e-07, |
|
"loss": 0.9815, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 2.9107505070993915, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.974983096686951e-07, |
|
"loss": 1.0464, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 2.9127789046653145, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 2.90736984448952e-07, |
|
"loss": 0.9971, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 2.914807302231237, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.839756592292089e-07, |
|
"loss": 1.0677, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 2.9168356997971605, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 2.772143340094659e-07, |
|
"loss": 1.0104, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 2.918864097363083, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 2.704530087897228e-07, |
|
"loss": 1.011, |
|
"step": 1439 |
|
}, |
|
{ |
|
"epoch": 2.920892494929006, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 2.636916835699797e-07, |
|
"loss": 0.9893, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.922920892494929, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.569303583502367e-07, |
|
"loss": 1.0341, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 2.924949290060852, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.501690331304936e-07, |
|
"loss": 1.0429, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 2.926977687626775, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.434077079107505e-07, |
|
"loss": 0.9993, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 2.9290060851926976, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 2.3664638269100746e-07, |
|
"loss": 1.0904, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 2.9310344827586206, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.2988505747126437e-07, |
|
"loss": 1.0504, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 2.9330628803245435, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.231237322515213e-07, |
|
"loss": 0.9868, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 2.9350912778904665, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 2.1636240703177825e-07, |
|
"loss": 1.0201, |
|
"step": 1447 |
|
}, |
|
{ |
|
"epoch": 2.9371196754563895, |
|
"grad_norm": 0.25, |
|
"learning_rate": 2.0960108181203516e-07, |
|
"loss": 1.068, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 2.9391480730223125, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 2.028397565922921e-07, |
|
"loss": 1.0115, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"loss": 0.9892, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.943204868154158, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.8931710615280598e-07, |
|
"loss": 1.0336, |
|
"step": 1451 |
|
}, |
|
{ |
|
"epoch": 2.945233265720081, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.825557809330629e-07, |
|
"loss": 1.0369, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 2.947261663286004, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 1.7579445571331983e-07, |
|
"loss": 0.9911, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 2.949290060851927, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.6903313049357676e-07, |
|
"loss": 1.0183, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 2.95131845841785, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.6227180527383367e-07, |
|
"loss": 1.0408, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 2.9533468559837726, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.555104800540906e-07, |
|
"loss": 1.0125, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 2.955375253549696, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.4874915483434755e-07, |
|
"loss": 1.0095, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 2.9574036511156185, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.4198782961460446e-07, |
|
"loss": 1.0189, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 2.9594320486815415, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.352265043948614e-07, |
|
"loss": 1.0923, |
|
"step": 1459 |
|
}, |
|
{ |
|
"epoch": 2.9614604462474645, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 1.2846517917511834e-07, |
|
"loss": 1.011, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.9634888438133875, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.2170385395537525e-07, |
|
"loss": 0.9992, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 2.9655172413793105, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.1494252873563219e-07, |
|
"loss": 1.0249, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 2.967545638945233, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.0818120351588913e-07, |
|
"loss": 1.0594, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 2.969574036511156, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 1.0141987829614605e-07, |
|
"loss": 1.0559, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 2.971602434077079, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 9.465855307640299e-08, |
|
"loss": 1.0337, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 2.973630831643002, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 8.789722785665991e-08, |
|
"loss": 1.0693, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 2.975659229208925, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 8.113590263691684e-08, |
|
"loss": 1.0364, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 2.977687626774848, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 7.437457741717378e-08, |
|
"loss": 1.0163, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 2.979716024340771, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 6.76132521974307e-08, |
|
"loss": 0.9708, |
|
"step": 1469 |
|
}, |
|
{ |
|
"epoch": 2.9817444219066935, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 6.085192697768762e-08, |
|
"loss": 1.016, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.9837728194726165, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.409060175794456e-08, |
|
"loss": 1.0521, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 2.9858012170385395, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 4.7329276538201494e-08, |
|
"loss": 1.0276, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 2.9878296146044625, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.056795131845842e-08, |
|
"loss": 1.0053, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 2.9898580121703855, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 3.380662609871535e-08, |
|
"loss": 0.9943, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 2.991886409736308, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.704530087897228e-08, |
|
"loss": 1.0071, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.9939148073022315, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 2.028397565922921e-08, |
|
"loss": 1.0272, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 2.995943204868154, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.352265043948614e-08, |
|
"loss": 1.0101, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 2.997971602434077, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 6.76132521974307e-09, |
|
"loss": 1.0342, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 0.0, |
|
"loss": 1.0294, |
|
"step": 1479 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1479, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.72557905413931e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|