|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6865774116031582, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006865774116031583, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 1.1363636363636364e-07, |
|
"loss": 0.3846, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013731548232063166, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 2.2727272727272729e-07, |
|
"loss": 0.3841, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0020597322348094747, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 3.409090909090909e-07, |
|
"loss": 0.3765, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0027463096464126332, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 4.5454545454545457e-07, |
|
"loss": 0.3732, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0034328870580157913, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 5.681818181818182e-07, |
|
"loss": 0.3724, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004119464469618949, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 6.818181818181818e-07, |
|
"loss": 0.3947, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004806041881222108, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 7.954545454545455e-07, |
|
"loss": 0.3758, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0054926192928252664, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 0.3984, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006179196704428424, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.0227272727272729e-06, |
|
"loss": 0.3905, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.006865774116031583, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"loss": 0.3914, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007552351527634741, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.3939, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008238928939237899, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.3636363636363636e-06, |
|
"loss": 0.3767, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.008925506350841057, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 1.4772727272727275e-06, |
|
"loss": 0.3718, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.009612083762444216, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.590909090909091e-06, |
|
"loss": 0.3907, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010298661174047374, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 1.7045454545454546e-06, |
|
"loss": 0.3783, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.010985238585650533, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.372, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01167181599725369, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.931818181818182e-06, |
|
"loss": 0.3888, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.012358393408856848, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 2.0454545454545457e-06, |
|
"loss": 0.3647, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013044970820460007, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 2.1590909090909092e-06, |
|
"loss": 0.3687, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.013731548232063165, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"loss": 0.3797, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014418125643666324, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 2.3863636363636367e-06, |
|
"loss": 0.3753, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015104703055269482, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.366, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01579128046687264, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 2.6136363636363637e-06, |
|
"loss": 0.3711, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.016477857878475798, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 0.3709, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.017164435290078956, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"loss": 0.3615, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.017851012701682115, |
|
"grad_norm": 0.375, |
|
"learning_rate": 2.954545454545455e-06, |
|
"loss": 0.3467, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.018537590113285273, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 3.0681818181818186e-06, |
|
"loss": 0.3494, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01922416752488843, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 3.181818181818182e-06, |
|
"loss": 0.3552, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01991074493649159, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 3.2954545454545456e-06, |
|
"loss": 0.3394, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02059732234809475, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 3.409090909090909e-06, |
|
"loss": 0.335, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.021283899759697907, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 3.522727272727273e-06, |
|
"loss": 0.347, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.021970477171301066, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.3467, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02265705458290422, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.3411, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02334363199450738, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 3.863636363636364e-06, |
|
"loss": 0.3373, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.024030209406110538, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 3.9772727272727275e-06, |
|
"loss": 0.3408, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.024716786817713696, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"loss": 0.3404, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.025403364229316855, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 4.204545454545455e-06, |
|
"loss": 0.346, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.026089941640920013, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 4.3181818181818185e-06, |
|
"loss": 0.3428, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.026776519052523172, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 4.4318181818181824e-06, |
|
"loss": 0.3418, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02746309646412633, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.339, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02814967387572949, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 4.6590909090909095e-06, |
|
"loss": 0.3137, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.028836251287332648, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 4.772727272727273e-06, |
|
"loss": 0.3213, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.029522828698935806, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 4.8863636363636365e-06, |
|
"loss": 0.3121, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.030209406110538965, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3153, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.030895983522142123, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 5.113636363636364e-06, |
|
"loss": 0.3107, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03158256093374528, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 5.2272727272727274e-06, |
|
"loss": 0.2963, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03226913834534844, |
|
"grad_norm": 0.2236328125, |
|
"learning_rate": 5.340909090909091e-06, |
|
"loss": 0.2936, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.032955715756951595, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.2909, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03364229316855476, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 5.568181818181818e-06, |
|
"loss": 0.2891, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03432887058015791, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 5.681818181818183e-06, |
|
"loss": 0.3015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.035015447991761074, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 5.795454545454546e-06, |
|
"loss": 0.2934, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03570202540336423, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 5.90909090909091e-06, |
|
"loss": 0.2977, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.036388602814967384, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 6.022727272727273e-06, |
|
"loss": 0.2956, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.037075180226570546, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 6.136363636363637e-06, |
|
"loss": 0.2857, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0377617576381737, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2854, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03844833504977686, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.2727, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03913491246138002, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 6.477272727272727e-06, |
|
"loss": 0.2857, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03982148987298318, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 6.590909090909091e-06, |
|
"loss": 0.2739, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.040508067284586335, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 6.704545454545454e-06, |
|
"loss": 0.276, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0411946446961895, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 0.2705, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04188122210779265, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 6.931818181818183e-06, |
|
"loss": 0.2786, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.042567799519395814, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 7.045454545454546e-06, |
|
"loss": 0.2739, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04325437693099897, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 7.15909090909091e-06, |
|
"loss": 0.2608, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04394095434260213, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.2702, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04462753175420529, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 7.386363636363637e-06, |
|
"loss": 0.2533, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04531410916580844, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.2546, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.046000686577411604, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 7.613636363636364e-06, |
|
"loss": 0.2556, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.04668726398901476, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 7.727272727272727e-06, |
|
"loss": 0.2565, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04737384140061792, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 7.840909090909091e-06, |
|
"loss": 0.2564, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.048060418812221076, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 7.954545454545455e-06, |
|
"loss": 0.251, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04874699622382424, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 8.068181818181819e-06, |
|
"loss": 0.2331, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04943357363542739, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 0.2446, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.050120151047030555, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 8.295454545454547e-06, |
|
"loss": 0.2319, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05080672845863371, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 8.40909090909091e-06, |
|
"loss": 0.2242, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05149330587023687, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 8.522727272727273e-06, |
|
"loss": 0.2326, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05217988328184003, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 8.636363636363637e-06, |
|
"loss": 0.2288, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05286646069344319, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.2166, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.053553038105046344, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 8.863636363636365e-06, |
|
"loss": 0.2296, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0542396155166495, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 8.977272727272727e-06, |
|
"loss": 0.214, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05492619292825266, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.2157, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.055612770339855816, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 9.204545454545455e-06, |
|
"loss": 0.2127, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05629934775145898, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 9.318181818181819e-06, |
|
"loss": 0.2045, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05698592516306213, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 9.431818181818183e-06, |
|
"loss": 0.1994, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.057672502574665295, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 9.545454545454547e-06, |
|
"loss": 0.2053, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05835907998626845, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 9.65909090909091e-06, |
|
"loss": 0.1845, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05904565739787161, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 9.772727272727273e-06, |
|
"loss": 0.1869, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05973223480947477, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 9.886363636363637e-06, |
|
"loss": 0.1928, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06041881222107793, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1915, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.061105389632681084, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 9.999998787742986e-06, |
|
"loss": 0.1911, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.061791967044284246, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 9.999995150972593e-06, |
|
"loss": 0.1823, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0624785444558874, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 9.999989089690783e-06, |
|
"loss": 0.1729, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06316512186749056, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 9.999980603900821e-06, |
|
"loss": 0.1627, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06385169927909372, |
|
"grad_norm": 0.125, |
|
"learning_rate": 9.99996969360728e-06, |
|
"loss": 0.1691, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06453827669069688, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 9.999956358816037e-06, |
|
"loss": 0.175, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06522485410230003, |
|
"grad_norm": 0.12451171875, |
|
"learning_rate": 9.999940599534277e-06, |
|
"loss": 0.1685, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06591143151390319, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 9.99992241577049e-06, |
|
"loss": 0.1599, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06659800892550635, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 9.999901807534473e-06, |
|
"loss": 0.1658, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.06728458633710951, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 9.999878774837331e-06, |
|
"loss": 0.1652, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.06797116374871266, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 9.999853317691473e-06, |
|
"loss": 0.1499, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06865774116031582, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 9.999825436110612e-06, |
|
"loss": 0.1522, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06934431857191899, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 9.999795130109777e-06, |
|
"loss": 0.1652, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07003089598352215, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 9.999762399705288e-06, |
|
"loss": 0.1515, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0707174733951253, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 9.999727244914785e-06, |
|
"loss": 0.1451, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07140405080672846, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 9.999689665757205e-06, |
|
"loss": 0.1461, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07209062821833162, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 9.9996496622528e-06, |
|
"loss": 0.1393, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07277720562993477, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 9.999607234423118e-06, |
|
"loss": 0.1367, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07346378304153793, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 9.99956238229102e-06, |
|
"loss": 0.1269, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07415036045314109, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 9.999515105880674e-06, |
|
"loss": 0.1372, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07483693786474425, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 9.999465405217547e-06, |
|
"loss": 0.1274, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0755235152763474, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 9.99941328032842e-06, |
|
"loss": 0.1425, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07621009268795056, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 9.999358731241378e-06, |
|
"loss": 0.126, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.07689667009955373, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 9.999301757985807e-06, |
|
"loss": 0.1378, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.07758324751115689, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 9.999242360592406e-06, |
|
"loss": 0.1446, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.07826982492276004, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 9.999180539093177e-06, |
|
"loss": 0.1272, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0789564023343632, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 9.999116293521427e-06, |
|
"loss": 0.1317, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.07964297974596636, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 9.999049623911771e-06, |
|
"loss": 0.127, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08032955715756952, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 9.998980530300129e-06, |
|
"loss": 0.1318, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08101613456917267, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 9.998909012723729e-06, |
|
"loss": 0.1315, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08170271198077583, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 9.9988350712211e-06, |
|
"loss": 0.1246, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.082389289392379, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 9.998758705832084e-06, |
|
"loss": 0.1244, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08307586680398214, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 9.998679916597822e-06, |
|
"loss": 0.1238, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0837624442155853, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 9.998598703560766e-06, |
|
"loss": 0.126, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08444902162718847, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 9.998515066764672e-06, |
|
"loss": 0.1258, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08513559903879163, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 9.998429006254601e-06, |
|
"loss": 0.1252, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08582217645039478, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.99834052207692e-06, |
|
"loss": 0.1163, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08650875386199794, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 9.998249614279306e-06, |
|
"loss": 0.1269, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0871953312736011, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 9.998156282910736e-06, |
|
"loss": 0.1267, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.08788190868520426, |
|
"grad_norm": 0.06787109375, |
|
"learning_rate": 9.998060528021493e-06, |
|
"loss": 0.1224, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.08856848609680741, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.997962349663172e-06, |
|
"loss": 0.123, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.08925506350841057, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 9.997861747888669e-06, |
|
"loss": 0.1259, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08994164092001374, |
|
"grad_norm": 0.06787109375, |
|
"learning_rate": 9.997758722752182e-06, |
|
"loss": 0.1234, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09062821833161688, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 9.997653274309225e-06, |
|
"loss": 0.1189, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09131479574322005, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.997545402616607e-06, |
|
"loss": 0.1156, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09200137315482321, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 9.997435107732451e-06, |
|
"loss": 0.1192, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09268795056642637, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 9.997322389716178e-06, |
|
"loss": 0.1104, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09337452797802952, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.99720724862852e-06, |
|
"loss": 0.1199, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09406110538963268, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 9.997089684531514e-06, |
|
"loss": 0.1147, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09474768280123584, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.996969697488499e-06, |
|
"loss": 0.1186, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.095434260212839, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.996847287564124e-06, |
|
"loss": 0.1054, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.09612083762444215, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 9.99672245482434e-06, |
|
"loss": 0.119, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09680741503604531, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.996595199336404e-06, |
|
"loss": 0.1132, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.09749399244764848, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 9.99646552116888e-06, |
|
"loss": 0.119, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.09818056985925164, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.996333420391635e-06, |
|
"loss": 0.1144, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.09886714727085479, |
|
"grad_norm": 0.07080078125, |
|
"learning_rate": 9.996198897075842e-06, |
|
"loss": 0.1195, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.09955372468245795, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.996061951293981e-06, |
|
"loss": 0.1136, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10024030209406111, |
|
"grad_norm": 0.06884765625, |
|
"learning_rate": 9.995922583119836e-06, |
|
"loss": 0.1078, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10092687950566426, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.995780792628494e-06, |
|
"loss": 0.1192, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10161345691726742, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.99563657989635e-06, |
|
"loss": 0.103, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10230003432887058, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 9.995489945001104e-06, |
|
"loss": 0.1227, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10298661174047374, |
|
"grad_norm": 0.057861328125, |
|
"learning_rate": 9.995340888021761e-06, |
|
"loss": 0.1059, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10367318915207689, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.995189409038626e-06, |
|
"loss": 0.1101, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.10435976656368005, |
|
"grad_norm": 0.06787109375, |
|
"learning_rate": 9.995035508133316e-06, |
|
"loss": 0.1257, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10504634397528322, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.994879185388747e-06, |
|
"loss": 0.1123, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.10573292138688638, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.994720440889147e-06, |
|
"loss": 0.1118, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.10641949879848953, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.994559274720043e-06, |
|
"loss": 0.1161, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.10710607621009269, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.994395686968267e-06, |
|
"loss": 0.1159, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.10779265362169585, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 9.994229677721957e-06, |
|
"loss": 0.1072, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.108479231033299, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 9.994061247070557e-06, |
|
"loss": 0.1131, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.10916580844490216, |
|
"grad_norm": 0.057861328125, |
|
"learning_rate": 9.993890395104812e-06, |
|
"loss": 0.1125, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.10985238585650532, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.993717121916778e-06, |
|
"loss": 0.108, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11053896326810848, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.993541427599805e-06, |
|
"loss": 0.1077, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11122554067971163, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.993363312248559e-06, |
|
"loss": 0.1006, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1119121180913148, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.993182775959004e-06, |
|
"loss": 0.1138, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11259869550291796, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.992999818828408e-06, |
|
"loss": 0.1093, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11328527291452112, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.992814440955346e-06, |
|
"loss": 0.1042, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11397185032612427, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.992626642439695e-06, |
|
"loss": 0.1077, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.11465842773772743, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.992436423382639e-06, |
|
"loss": 0.1057, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.11534500514933059, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.992243783886663e-06, |
|
"loss": 0.1147, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.11603158256093375, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.992048724055559e-06, |
|
"loss": 0.1167, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1167181599725369, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.99185124399442e-06, |
|
"loss": 0.1223, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11740473738414006, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.991651343809642e-06, |
|
"loss": 0.1134, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.11809131479574322, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.991449023608932e-06, |
|
"loss": 0.1033, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.11877789220734637, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.991244283501294e-06, |
|
"loss": 0.1045, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.11946446961894953, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.991037123597041e-06, |
|
"loss": 0.1018, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1201510470305527, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.990827544007783e-06, |
|
"loss": 0.1089, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12083762444215586, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.990615544846439e-06, |
|
"loss": 0.1114, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.121524201853759, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.99040112622723e-06, |
|
"loss": 0.1115, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12221077926536217, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.990184288265679e-06, |
|
"loss": 0.1022, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12289735667696533, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.989965031078616e-06, |
|
"loss": 0.0975, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.12358393408856849, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 9.989743354784174e-06, |
|
"loss": 0.0964, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12427051150017164, |
|
"grad_norm": 0.05859375, |
|
"learning_rate": 9.989519259501786e-06, |
|
"loss": 0.1064, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1249570889117748, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.989292745352191e-06, |
|
"loss": 0.0997, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.12564366632337795, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.98906381245743e-06, |
|
"loss": 0.1122, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1263302437349811, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.988832460940846e-06, |
|
"loss": 0.0986, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.12701682114658427, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.98859869092709e-06, |
|
"loss": 0.1093, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.12770339855818744, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.98836250254211e-06, |
|
"loss": 0.0981, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1283899759697906, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.988123895913162e-06, |
|
"loss": 0.1165, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.12907655338139376, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.987882871168801e-06, |
|
"loss": 0.1141, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.12976313079299692, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 9.987639428438888e-06, |
|
"loss": 0.1002, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.13044970820460006, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.987393567854585e-06, |
|
"loss": 0.0978, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13113628561620322, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.987145289548356e-06, |
|
"loss": 0.0973, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13182286302780638, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.986894593653969e-06, |
|
"loss": 0.1088, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13250944043940954, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.986641480306495e-06, |
|
"loss": 0.1175, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1331960178510127, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.986385949642307e-06, |
|
"loss": 0.1183, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.13388259526261587, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.986128001799077e-06, |
|
"loss": 0.1023, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13456917267421903, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.985867636915784e-06, |
|
"loss": 0.1152, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.13525575008582216, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.98560485513271e-06, |
|
"loss": 0.1187, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.13594232749742532, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.985339656591434e-06, |
|
"loss": 0.1111, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1366289049090285, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.985072041434841e-06, |
|
"loss": 0.1104, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.13731548232063165, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.984802009807117e-06, |
|
"loss": 0.1009, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1380020597322348, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.984529561853749e-06, |
|
"loss": 0.0999, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.13868863714383797, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.984254697721528e-06, |
|
"loss": 0.1025, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.13937521455544113, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.983977417558544e-06, |
|
"loss": 0.1009, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1400617919670443, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.98369772151419e-06, |
|
"loss": 0.1008, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14074836937864743, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.983415609739165e-06, |
|
"loss": 0.107, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1414349467902506, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.98313108238546e-06, |
|
"loss": 0.1196, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14212152420185376, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.982844139606375e-06, |
|
"loss": 0.1, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.14280810161345692, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.982554781556512e-06, |
|
"loss": 0.1047, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.14349467902506008, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.982263008391769e-06, |
|
"loss": 0.1052, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.14418125643666324, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.981968820269347e-06, |
|
"loss": 0.0929, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1448678338482664, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.98167221734775e-06, |
|
"loss": 0.1063, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.14555441125986954, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.981373199786782e-06, |
|
"loss": 0.0981, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1462409886714727, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.981071767747547e-06, |
|
"loss": 0.0982, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.14692756608307586, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.980767921392453e-06, |
|
"loss": 0.1059, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.14761414349467902, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.980461660885204e-06, |
|
"loss": 0.1098, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14830072090628219, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.98015298639081e-06, |
|
"loss": 0.1041, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.14898729831788535, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.979841898075577e-06, |
|
"loss": 0.097, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1496738757294885, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.979528396107114e-06, |
|
"loss": 0.1045, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15036045314109167, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.97921248065433e-06, |
|
"loss": 0.1028, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1510470305526948, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.978894151887435e-06, |
|
"loss": 0.1006, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15173360796429797, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.978573409977937e-06, |
|
"loss": 0.1098, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.15242018537590113, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.978250255098645e-06, |
|
"loss": 0.1036, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1531067627875043, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.977924687423672e-06, |
|
"loss": 0.0981, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.15379334019910745, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.977596707128424e-06, |
|
"loss": 0.0971, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.15447991761071062, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.977266314389611e-06, |
|
"loss": 0.0996, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15516649502231378, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.976933509385245e-06, |
|
"loss": 0.1187, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1558530724339169, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.976598292294632e-06, |
|
"loss": 0.1016, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.15653964984552007, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.976260663298384e-06, |
|
"loss": 0.0957, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.15722622725712324, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.975920622578403e-06, |
|
"loss": 0.0985, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1579128046687264, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.975578170317905e-06, |
|
"loss": 0.1004, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15859938208032956, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.97523330670139e-06, |
|
"loss": 0.1095, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.15928595949193272, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.974886031914665e-06, |
|
"loss": 0.1072, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.15997253690353588, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 9.974536346144838e-06, |
|
"loss": 0.1003, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.16065911431513905, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.974184249580309e-06, |
|
"loss": 0.1118, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16134569172674218, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.973829742410784e-06, |
|
"loss": 0.1, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.16203226913834534, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.973472824827262e-06, |
|
"loss": 0.103, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1627188465499485, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.973113497022047e-06, |
|
"loss": 0.098, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.16340542396155167, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.972751759188736e-06, |
|
"loss": 0.1063, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.16409200137315483, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.972387611522227e-06, |
|
"loss": 0.1141, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.164778578784758, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.972021054218712e-06, |
|
"loss": 0.1058, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16546515619636115, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.971652087475691e-06, |
|
"loss": 0.1034, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.16615173360796429, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.971280711491952e-06, |
|
"loss": 0.1071, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.16683831101956745, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.970906926467588e-06, |
|
"loss": 0.1097, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1675248884311706, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.970530732603984e-06, |
|
"loss": 0.1005, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.16821146584277377, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.97015213010383e-06, |
|
"loss": 0.1067, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16889804325437693, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.969771119171108e-06, |
|
"loss": 0.0961, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1695846206659801, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.969387700011098e-06, |
|
"loss": 0.0966, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17027119807758326, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.969001872830383e-06, |
|
"loss": 0.1192, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1709577754891864, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.968613637836833e-06, |
|
"loss": 0.1069, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.17164435290078955, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.968222995239628e-06, |
|
"loss": 0.1083, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17233093031239272, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.967829945249234e-06, |
|
"loss": 0.0978, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.17301750772399588, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.967434488077422e-06, |
|
"loss": 0.1058, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.17370408513559904, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.967036623937252e-06, |
|
"loss": 0.0946, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1743906625472022, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.966636353043092e-06, |
|
"loss": 0.095, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.17507723995880536, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.966233675610599e-06, |
|
"loss": 0.1071, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17576381737040853, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.965828591856725e-06, |
|
"loss": 0.0944, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.17645039478201166, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.965421101999721e-06, |
|
"loss": 0.1117, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.17713697219361482, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.965011206259138e-06, |
|
"loss": 0.1015, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.17782354960521798, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.964598904855818e-06, |
|
"loss": 0.1103, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.17851012701682115, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.964184198011903e-06, |
|
"loss": 0.11, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1791967044284243, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.963767085950824e-06, |
|
"loss": 0.1093, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.17988328184002747, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 9.96334756889732e-06, |
|
"loss": 0.1038, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.18056985925163063, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.962925647077414e-06, |
|
"loss": 0.1021, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.18125643666323377, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.962501320718432e-06, |
|
"loss": 0.0979, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.18194301407483693, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.96207459004899e-06, |
|
"loss": 0.1059, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1826295914864401, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.961645455299006e-06, |
|
"loss": 0.107, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.18331616889804325, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 9.961213916699685e-06, |
|
"loss": 0.1066, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.18400274630964641, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.960779974483537e-06, |
|
"loss": 0.1127, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.18468932372124958, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.96034362888436e-06, |
|
"loss": 0.1034, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.18537590113285274, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.959904880137246e-06, |
|
"loss": 0.121, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1860624785444559, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.959463728478586e-06, |
|
"loss": 0.1028, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.18674905595605903, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.959020174146066e-06, |
|
"loss": 0.1042, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.1874356333676622, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 9.958574217378663e-06, |
|
"loss": 0.1, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.18812221077926536, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.95812585841665e-06, |
|
"loss": 0.095, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.18880878819086852, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.957675097501594e-06, |
|
"loss": 0.1086, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18949536560247168, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.957221934876355e-06, |
|
"loss": 0.1024, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.19018194301407484, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.956766370785093e-06, |
|
"loss": 0.0951, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.190868520425678, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.956308405473252e-06, |
|
"loss": 0.1097, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.19155509783728114, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.95584803918758e-06, |
|
"loss": 0.1037, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.1922416752488843, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.955385272176108e-06, |
|
"loss": 0.1065, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19292825266048746, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.95492010468817e-06, |
|
"loss": 0.0983, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.19361483007209063, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.954452536974387e-06, |
|
"loss": 0.1056, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.1943014074836938, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.953982569286679e-06, |
|
"loss": 0.0873, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.19498798489529695, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.953510201878251e-06, |
|
"loss": 0.1029, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.1956745623069001, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 9.953035435003608e-06, |
|
"loss": 0.0978, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.19636113971850327, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.952558268918546e-06, |
|
"loss": 0.1034, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.1970477171301064, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.952078703880153e-06, |
|
"loss": 0.0946, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.19773429454170957, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.951596740146809e-06, |
|
"loss": 0.1014, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.19842087195331273, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.951112377978185e-06, |
|
"loss": 0.0935, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1991074493649159, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.950625617635247e-06, |
|
"loss": 0.1053, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19979402677651906, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.950136459380253e-06, |
|
"loss": 0.1005, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.20048060418812222, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.949644903476752e-06, |
|
"loss": 0.1031, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.20116718159972538, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 9.949150950189586e-06, |
|
"loss": 0.0987, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.20185375901132852, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.948654599784886e-06, |
|
"loss": 0.1033, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.20254033642293168, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.948155852530075e-06, |
|
"loss": 0.0913, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.20322691383453484, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.947654708693872e-06, |
|
"loss": 0.1012, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.203913491246138, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.947151168546281e-06, |
|
"loss": 0.1038, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.20460006865774116, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.946645232358602e-06, |
|
"loss": 0.0978, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.20528664606934433, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.94613690040342e-06, |
|
"loss": 0.1099, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2059732234809475, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.945626172954617e-06, |
|
"loss": 0.1112, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20665980089255062, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.945113050287363e-06, |
|
"loss": 0.0974, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.20734637830415378, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.94459753267812e-06, |
|
"loss": 0.0924, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.20803295571575695, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.944079620404638e-06, |
|
"loss": 0.0959, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2087195331273601, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 9.943559313745957e-06, |
|
"loss": 0.1024, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.20940611053896327, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 9.943036612982409e-06, |
|
"loss": 0.1018, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.21009268795056643, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.942511518395616e-06, |
|
"loss": 0.1018, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2107792653621696, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.941984030268487e-06, |
|
"loss": 0.0995, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.21146584277377276, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.941454148885226e-06, |
|
"loss": 0.1024, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2121524201853759, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.940921874531322e-06, |
|
"loss": 0.0983, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.21283899759697905, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.94038720749355e-06, |
|
"loss": 0.1033, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2135255750085822, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.939850148059983e-06, |
|
"loss": 0.0983, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.21421215242018538, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.939310696519977e-06, |
|
"loss": 0.098, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.21489872983178854, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.938768853164176e-06, |
|
"loss": 0.1087, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2155853072433917, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.93822461828452e-06, |
|
"loss": 0.1025, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.21627188465499486, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 9.937677992174228e-06, |
|
"loss": 0.097, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.216958462066598, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.937128975127814e-06, |
|
"loss": 0.1113, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.21764503947820116, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.936577567441074e-06, |
|
"loss": 0.1005, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.21833161688980432, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.936023769411103e-06, |
|
"loss": 0.0923, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.21901819430140748, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.935467581336269e-06, |
|
"loss": 0.1031, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.21970477171301064, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.93490900351624e-06, |
|
"loss": 0.09, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2203913491246138, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.934348036251969e-06, |
|
"loss": 0.0996, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.22107792653621697, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.933784679845687e-06, |
|
"loss": 0.1056, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.22176450394782013, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.933218934600927e-06, |
|
"loss": 0.1074, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.22245108135942326, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.9326508008225e-06, |
|
"loss": 0.109, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.22313765877102643, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.932080278816503e-06, |
|
"loss": 0.0926, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2238242361826296, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.931507368890323e-06, |
|
"loss": 0.0987, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.22451081359423275, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.930932071352635e-06, |
|
"loss": 0.104, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2251973910058359, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.930354386513399e-06, |
|
"loss": 0.1022, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.22588396841743907, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.929774314683856e-06, |
|
"loss": 0.099, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.22657054582904224, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.929191856176543e-06, |
|
"loss": 0.0948, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22725712324064537, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.928607011305273e-06, |
|
"loss": 0.1033, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.22794370065224853, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.928019780385152e-06, |
|
"loss": 0.1096, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2286302780638517, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.927430163732566e-06, |
|
"loss": 0.0993, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.22931685547545486, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.926838161665195e-06, |
|
"loss": 0.0969, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.23000343288705802, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.926243774501993e-06, |
|
"loss": 0.1008, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.23069001029866118, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.925647002563205e-06, |
|
"loss": 0.0915, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.23137658771026434, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.92504784617036e-06, |
|
"loss": 0.1082, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2320631651218675, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.924446305646278e-06, |
|
"loss": 0.101, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.23274974253347064, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 9.923842381315049e-06, |
|
"loss": 0.1018, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2334363199450738, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.92323607350206e-06, |
|
"loss": 0.0961, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.23412289735667696, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.92262738253398e-06, |
|
"loss": 0.1025, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.23480947476828012, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.922016308738757e-06, |
|
"loss": 0.0994, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2354960521798833, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.921402852445627e-06, |
|
"loss": 0.0979, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.23618262959148645, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.920787013985106e-06, |
|
"loss": 0.0862, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2368692070030896, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.920168793689e-06, |
|
"loss": 0.1006, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.23755578441469274, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.919548191890395e-06, |
|
"loss": 0.0911, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2382423618262959, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.918925208923654e-06, |
|
"loss": 0.1028, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.23892893923789907, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.918299845124433e-06, |
|
"loss": 0.1066, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.23961551664950223, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.917672100829664e-06, |
|
"loss": 0.0895, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2403020940611054, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.917041976377564e-06, |
|
"loss": 0.1003, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24098867147270855, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.916409472107632e-06, |
|
"loss": 0.1059, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.24167524888431172, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.915774588360649e-06, |
|
"loss": 0.0993, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.24236182629591485, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.915137325478677e-06, |
|
"loss": 0.1147, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.243048403707518, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.914497683805065e-06, |
|
"loss": 0.1039, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.24373498111912117, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.913855663684438e-06, |
|
"loss": 0.1015, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.24442155853072434, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.9132112654627e-06, |
|
"loss": 0.096, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2451081359423275, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.912564489487047e-06, |
|
"loss": 0.1075, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.24579471335393066, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.911915336105943e-06, |
|
"loss": 0.1002, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.24648129076553382, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.911263805669147e-06, |
|
"loss": 0.1126, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.24716786817713698, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.910609898527686e-06, |
|
"loss": 0.0963, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24785444558874012, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.909953615033872e-06, |
|
"loss": 0.1024, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.24854102300034328, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.9092949555413e-06, |
|
"loss": 0.1034, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.24922760041194644, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.908633920404844e-06, |
|
"loss": 0.0964, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2499141778235496, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.907970509980657e-06, |
|
"loss": 0.0991, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.25060075523515274, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.90730472462617e-06, |
|
"loss": 0.0935, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2512873326467559, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.906636564700096e-06, |
|
"loss": 0.0963, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.25197391005835906, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.905966030562426e-06, |
|
"loss": 0.1065, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2526604874699622, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.905293122574433e-06, |
|
"loss": 0.0968, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.2533470648815654, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.904617841098666e-06, |
|
"loss": 0.0966, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.25403364229316855, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.903940186498953e-06, |
|
"loss": 0.1047, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2547202197047717, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.903260159140404e-06, |
|
"loss": 0.0971, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2554067971163749, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 9.902577759389402e-06, |
|
"loss": 0.1025, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.25609337452797803, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.901892987613612e-06, |
|
"loss": 0.1088, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2567799519395812, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.901205844181976e-06, |
|
"loss": 0.091, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.25746652935118436, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.900516329464713e-06, |
|
"loss": 0.1, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2581531067627875, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.89982444383332e-06, |
|
"loss": 0.1043, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2588396841743907, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.899130187660573e-06, |
|
"loss": 0.1011, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.25952626158599384, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.898433561320525e-06, |
|
"loss": 0.0956, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.26021283899759695, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.897734565188504e-06, |
|
"loss": 0.1147, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2608994164092001, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.897033199641114e-06, |
|
"loss": 0.0911, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2615859938208033, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.896329465056238e-06, |
|
"loss": 0.1079, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.26227257123240644, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.895623361813036e-06, |
|
"loss": 0.0945, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2629591486440096, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.894914890291944e-06, |
|
"loss": 0.0953, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.26364572605561276, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.89420405087467e-06, |
|
"loss": 0.1101, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.2643323034672159, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.893490843944201e-06, |
|
"loss": 0.0926, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2650188808788191, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.892775269884802e-06, |
|
"loss": 0.0907, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.26570545829042225, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.892057329082009e-06, |
|
"loss": 0.0903, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2663920357020254, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.891337021922633e-06, |
|
"loss": 0.0953, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.26707861311362857, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.890614348794764e-06, |
|
"loss": 0.0968, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.26776519052523173, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 9.889889310087766e-06, |
|
"loss": 0.1017, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2684517679368349, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.889161906192271e-06, |
|
"loss": 0.0925, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.26913834534843806, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.888432137500194e-06, |
|
"loss": 0.1004, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2698249227600412, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.88770000440472e-06, |
|
"loss": 0.0939, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2705115001716443, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.886965507300309e-06, |
|
"loss": 0.093, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.2711980775832475, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.886228646582694e-06, |
|
"loss": 0.102, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.27188465499485065, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.885489422648878e-06, |
|
"loss": 0.1058, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.2725712324064538, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.884747835897145e-06, |
|
"loss": 0.1039, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.273257809818057, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.884003886727044e-06, |
|
"loss": 0.0908, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.27394438722966014, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 9.883257575539404e-06, |
|
"loss": 0.0924, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2746309646412633, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.88250890273632e-06, |
|
"loss": 0.1073, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27531754205286646, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 9.881757868721166e-06, |
|
"loss": 0.0973, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.2760041194644696, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.881004473898585e-06, |
|
"loss": 0.0924, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2766906968760728, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.880248718674486e-06, |
|
"loss": 0.11, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.27737727428767595, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.879490603456062e-06, |
|
"loss": 0.093, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2780638516992791, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.878730128651768e-06, |
|
"loss": 0.1046, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.27875042911088227, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.877967294671333e-06, |
|
"loss": 0.1064, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.27943700652248543, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.87720210192576e-06, |
|
"loss": 0.0954, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.2801235839340886, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.876434550827315e-06, |
|
"loss": 0.0944, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2808101613456917, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.875664641789545e-06, |
|
"loss": 0.1119, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.28149673875729486, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.874892375227262e-06, |
|
"loss": 0.1031, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.282183316168898, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.874117751556544e-06, |
|
"loss": 0.0984, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.2828698935805012, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.873340771194749e-06, |
|
"loss": 0.0978, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.28355647099210435, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.872561434560493e-06, |
|
"loss": 0.0999, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2842430484037075, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.871779742073675e-06, |
|
"loss": 0.0871, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.28492962581531067, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.870995694155449e-06, |
|
"loss": 0.0974, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.28561620322691383, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.87020929122825e-06, |
|
"loss": 0.0981, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.286302780638517, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.869420533715777e-06, |
|
"loss": 0.0807, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.28698935805012016, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.868629422042994e-06, |
|
"loss": 0.1069, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.2876759354617233, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 9.867835956636137e-06, |
|
"loss": 0.101, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.2883625128733265, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.867040137922712e-06, |
|
"loss": 0.1105, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28904909028492964, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.866241966331491e-06, |
|
"loss": 0.1075, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.2897356676965328, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.865441442292513e-06, |
|
"loss": 0.0941, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.29042224510813597, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.864638566237084e-06, |
|
"loss": 0.0968, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.2911088225197391, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.86383333859778e-06, |
|
"loss": 0.0945, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.29179539993134224, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.863025759808444e-06, |
|
"loss": 0.0993, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2924819773429454, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.862215830304178e-06, |
|
"loss": 0.0939, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.29316855475454856, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.861403550521361e-06, |
|
"loss": 0.1086, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.2938551321661517, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.860588920897633e-06, |
|
"loss": 0.0952, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.2945417095777549, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.859771941871903e-06, |
|
"loss": 0.0867, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.29522828698935805, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.858952613884339e-06, |
|
"loss": 0.1108, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2959148644009612, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 9.858130937376384e-06, |
|
"loss": 0.102, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.29660144181256437, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.857306912790737e-06, |
|
"loss": 0.1038, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.29728801922416753, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.85648054057137e-06, |
|
"loss": 0.0986, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.2979745966357707, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.855651821163516e-06, |
|
"loss": 0.0851, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.29866117404737386, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.854820755013672e-06, |
|
"loss": 0.0922, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.299347751458977, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.853987342569604e-06, |
|
"loss": 0.0868, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3000343288705802, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.853151584280332e-06, |
|
"loss": 0.0863, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.30072090628218334, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.852313480596155e-06, |
|
"loss": 0.1026, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.30140748369378645, |
|
"grad_norm": 0.044921875, |
|
"learning_rate": 9.851473031968621e-06, |
|
"loss": 0.099, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3020940611053896, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 9.850630238850549e-06, |
|
"loss": 0.1165, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3027806385169928, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.849785101696022e-06, |
|
"loss": 0.1013, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.30346721592859593, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.848937620960382e-06, |
|
"loss": 0.0974, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3041537933401991, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.848087797100234e-06, |
|
"loss": 0.0901, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.30484037075180226, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.84723563057345e-06, |
|
"loss": 0.0938, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3055269481634054, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.84638112183916e-06, |
|
"loss": 0.0967, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3062135255750086, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.845524271357757e-06, |
|
"loss": 0.0848, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.30690010298661174, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.844665079590892e-06, |
|
"loss": 0.0881, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3075866803982149, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.843803547001487e-06, |
|
"loss": 0.0942, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.30827325780981807, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.842939674053715e-06, |
|
"loss": 0.0922, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.30895983522142123, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.842073461213017e-06, |
|
"loss": 0.0988, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3096464126330244, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.841204908946091e-06, |
|
"loss": 0.0916, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.31033299004462755, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.840334017720896e-06, |
|
"loss": 0.0971, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3110195674562307, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.839460788006652e-06, |
|
"loss": 0.0919, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3117061448678338, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.838585220273837e-06, |
|
"loss": 0.0892, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.312392722279437, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.837707314994192e-06, |
|
"loss": 0.096, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.31307929969104015, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.836827072640716e-06, |
|
"loss": 0.1013, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3137658771026433, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.835944493687665e-06, |
|
"loss": 0.0991, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.31445245451424647, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.835059578610556e-06, |
|
"loss": 0.106, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.31513903192584963, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.834172327886166e-06, |
|
"loss": 0.0946, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3158256093374528, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.833282741992526e-06, |
|
"loss": 0.1118, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.31651218674905596, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.832390821408929e-06, |
|
"loss": 0.0994, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3171987641606591, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.831496566615927e-06, |
|
"loss": 0.1095, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3178853415722623, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.830599978095323e-06, |
|
"loss": 0.098, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.31857191898386544, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 9.829701056330188e-06, |
|
"loss": 0.1083, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3192584963954686, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.828799801804837e-06, |
|
"loss": 0.0942, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.31994507380707177, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 9.827896215004853e-06, |
|
"loss": 0.1035, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.32063165121867493, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.826990296417071e-06, |
|
"loss": 0.1024, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3213182286302781, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.826082046529581e-06, |
|
"loss": 0.103, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3220048060418812, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.825171465831732e-06, |
|
"loss": 0.0943, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.32269138345348436, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.824258554814126e-06, |
|
"loss": 0.0973, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3233779608650875, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.823343313968624e-06, |
|
"loss": 0.0958, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3240645382766907, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.82242574378834e-06, |
|
"loss": 0.0972, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.32475111568829385, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 9.821505844767642e-06, |
|
"loss": 0.0943, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.325437693099897, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.820583617402153e-06, |
|
"loss": 0.0925, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.32612427051150017, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.819659062188754e-06, |
|
"loss": 0.0903, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.32681084792310333, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.818732179625578e-06, |
|
"loss": 0.0951, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3274974253347065, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 9.817802970212009e-06, |
|
"loss": 0.0964, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.32818400274630966, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 9.81687143444869e-06, |
|
"loss": 0.1017, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3288705801579128, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.815937572837511e-06, |
|
"loss": 0.0909, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.329557157569516, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.815001385881624e-06, |
|
"loss": 0.0889, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.33024373498111914, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.814062874085424e-06, |
|
"loss": 0.0918, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3309303123927223, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.813122037954567e-06, |
|
"loss": 0.1072, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.33161688980432547, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.812178877995954e-06, |
|
"loss": 0.0905, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.33230346721592857, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.811233394717742e-06, |
|
"loss": 0.0895, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.33299004462753173, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.810285588629342e-06, |
|
"loss": 0.1091, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3336766220391349, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.809335460241412e-06, |
|
"loss": 0.0977, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.33436319945073806, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.808383010065863e-06, |
|
"loss": 0.0955, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3350497768623412, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.807428238615858e-06, |
|
"loss": 0.0835, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3357363542739444, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.806471146405809e-06, |
|
"loss": 0.0982, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.33642293168554754, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.805511733951379e-06, |
|
"loss": 0.0884, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3371095090971507, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.804550001769478e-06, |
|
"loss": 0.0991, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.33779608650875387, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.803585950378274e-06, |
|
"loss": 0.0955, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.33848266392035703, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 9.802619580297178e-06, |
|
"loss": 0.0919, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3391692413319602, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.801650892046851e-06, |
|
"loss": 0.1032, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.33985581874356335, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.800679886149203e-06, |
|
"loss": 0.103, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3405423961551665, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.799706563127395e-06, |
|
"loss": 0.098, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3412289735667697, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.798730923505833e-06, |
|
"loss": 0.0885, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3419155509783728, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.797752967810176e-06, |
|
"loss": 0.1006, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.34260212838997595, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.796772696567323e-06, |
|
"loss": 0.0914, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3432887058015791, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.795790110305431e-06, |
|
"loss": 0.0924, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.34397528321318227, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.794805209553896e-06, |
|
"loss": 0.0916, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.34466186062478543, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.793817994843362e-06, |
|
"loss": 0.0906, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.3453484380363886, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.792828466705725e-06, |
|
"loss": 0.1107, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.34603501544799176, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.79183662567412e-06, |
|
"loss": 0.091, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.3467215928595949, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.790842472282935e-06, |
|
"loss": 0.1009, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3474081702711981, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.789846007067802e-06, |
|
"loss": 0.0908, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.34809474768280124, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.788847230565592e-06, |
|
"loss": 0.0912, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.3487813250944044, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 9.787846143314433e-06, |
|
"loss": 0.099, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.34946790250600757, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.786842745853685e-06, |
|
"loss": 0.0913, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.35015447991761073, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.785837038723966e-06, |
|
"loss": 0.0976, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3508410573292139, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.784829022467128e-06, |
|
"loss": 0.1042, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.35152763474081705, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.783818697626273e-06, |
|
"loss": 0.0993, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.35221421215242016, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 9.782806064745742e-06, |
|
"loss": 0.0862, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.3529007895640233, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.781791124371124e-06, |
|
"loss": 0.098, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.3535873669756265, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.78077387704925e-06, |
|
"loss": 0.0924, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.35427394438722964, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.779754323328192e-06, |
|
"loss": 0.0944, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.3549605217988328, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.778732463757267e-06, |
|
"loss": 0.0977, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.35564709921043597, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.777708298887034e-06, |
|
"loss": 0.0957, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.35633367662203913, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 9.776681829269291e-06, |
|
"loss": 0.0824, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.3570202540336423, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.775653055457082e-06, |
|
"loss": 0.0913, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.35770683144524545, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.774621978004692e-06, |
|
"loss": 0.0943, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3583934088568486, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.773588597467642e-06, |
|
"loss": 0.0938, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3590799862684518, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.772552914402701e-06, |
|
"loss": 0.0934, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.35976656368005494, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.771514929367875e-06, |
|
"loss": 0.1038, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3604531410916581, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.77047464292241e-06, |
|
"loss": 0.0952, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.36113971850326126, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.76943205562679e-06, |
|
"loss": 0.0932, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3618262959148644, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.768387168042745e-06, |
|
"loss": 0.0898, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.36251287332646753, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.76733998073324e-06, |
|
"loss": 0.1001, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3631994507380707, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 9.766290494262477e-06, |
|
"loss": 0.0942, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.36388602814967386, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.7652387091959e-06, |
|
"loss": 0.1166, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.364572605561277, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.764184626100193e-06, |
|
"loss": 0.0933, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.3652591829728802, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 9.763128245543272e-06, |
|
"loss": 0.0961, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.36594576038448334, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.7620695680943e-06, |
|
"loss": 0.0964, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.3666323377960865, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.761008594323666e-06, |
|
"loss": 0.0975, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.36731891520768967, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.759945324803006e-06, |
|
"loss": 0.0951, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.36800549261929283, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.75887976010519e-06, |
|
"loss": 0.0882, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.368692070030896, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.75781190080432e-06, |
|
"loss": 0.1036, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.36937864744249915, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.756741747475744e-06, |
|
"loss": 0.0964, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3700652248541023, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.755669300696035e-06, |
|
"loss": 0.1018, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.3707518022657055, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.75459456104301e-06, |
|
"loss": 0.0919, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.37143837967730864, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 9.753517529095716e-06, |
|
"loss": 0.0999, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.3721249570889118, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.752438205434439e-06, |
|
"loss": 0.0877, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3728115345005149, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.751356590640696e-06, |
|
"loss": 0.0926, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.37349811191211807, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.750272685297241e-06, |
|
"loss": 0.0983, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.37418468932372123, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.749186489988065e-06, |
|
"loss": 0.0966, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3748712667353244, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.748098005298384e-06, |
|
"loss": 0.1003, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.37555784414692756, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.747007231814656e-06, |
|
"loss": 0.0979, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3762444215585307, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 9.745914170124568e-06, |
|
"loss": 0.1027, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.3769309989701339, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 9.74481882081704e-06, |
|
"loss": 0.0885, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.37761757638173704, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 9.743721184482226e-06, |
|
"loss": 0.0991, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3783041537933402, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.742621261711512e-06, |
|
"loss": 0.092, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.37899073120494337, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.741519053097516e-06, |
|
"loss": 0.1007, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3796773086165465, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.740414559234085e-06, |
|
"loss": 0.0954, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.3803638860281497, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.739307780716301e-06, |
|
"loss": 0.1059, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.38105046343975285, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.738198718140471e-06, |
|
"loss": 0.0927, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.381737040851356, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.737087372104143e-06, |
|
"loss": 0.0929, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.3824236182629592, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.735973743206085e-06, |
|
"loss": 0.1015, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.3831101956745623, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.7348578320463e-06, |
|
"loss": 0.0959, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.38379677308616544, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.73373963922602e-06, |
|
"loss": 0.1032, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.3844833504977686, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 9.732619165347705e-06, |
|
"loss": 0.0872, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.38516992790937177, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.731496411015046e-06, |
|
"loss": 0.0942, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.38585650532097493, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 9.73037137683296e-06, |
|
"loss": 0.1003, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.3865430827325781, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.729244063407594e-06, |
|
"loss": 0.094, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.38722966014418125, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.728114471346324e-06, |
|
"loss": 0.0935, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.3879162375557844, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.726982601257755e-06, |
|
"loss": 0.1077, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.3886028149673876, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.725848453751712e-06, |
|
"loss": 0.1007, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.38928939237899074, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.724712029439255e-06, |
|
"loss": 0.0951, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.3899759697905939, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.723573328932669e-06, |
|
"loss": 0.0993, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.39066254720219706, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.722432352845458e-06, |
|
"loss": 0.096, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.3913491246138002, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.721289101792367e-06, |
|
"loss": 0.0948, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3920357020254034, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.72014357638935e-06, |
|
"loss": 0.0967, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.39272227943700655, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.718995777253598e-06, |
|
"loss": 0.0923, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.39340885684860966, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 9.717845705003523e-06, |
|
"loss": 0.0938, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.3940954342602128, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.716693360258761e-06, |
|
"loss": 0.0976, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.394782011671816, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.715538743640177e-06, |
|
"loss": 0.0969, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.39546858908341914, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.71438185576985e-06, |
|
"loss": 0.0894, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.3961551664950223, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.71322269727109e-06, |
|
"loss": 0.1066, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.39684174390662547, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.712061268768436e-06, |
|
"loss": 0.0931, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.39752832131822863, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.710897570887639e-06, |
|
"loss": 0.1004, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.3982148987298318, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.709731604255675e-06, |
|
"loss": 0.0894, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.39890147614143495, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.70856336950075e-06, |
|
"loss": 0.0988, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.3995880535530381, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.707392867252282e-06, |
|
"loss": 0.1024, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4002746309646413, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.706220098140917e-06, |
|
"loss": 0.0846, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.40096120837624444, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.705045062798519e-06, |
|
"loss": 0.1155, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4016477857878476, |
|
"grad_norm": 0.05859375, |
|
"learning_rate": 9.703867761858177e-06, |
|
"loss": 0.1043, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.40233436319945076, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.702688195954198e-06, |
|
"loss": 0.0926, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4030209406110539, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.70150636572211e-06, |
|
"loss": 0.0909, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.40370751802265703, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.700322271798657e-06, |
|
"loss": 0.1098, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4043940954342602, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.69913591482181e-06, |
|
"loss": 0.0972, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.40508067284586335, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.697947295430754e-06, |
|
"loss": 0.0873, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4057672502574665, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.696756414265894e-06, |
|
"loss": 0.0976, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.4064538276690697, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.695563271968853e-06, |
|
"loss": 0.1141, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.40714040508067284, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.694367869182479e-06, |
|
"loss": 0.0915, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.407826982492276, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.693170206550824e-06, |
|
"loss": 0.0912, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.40851355990387916, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.69197028471917e-06, |
|
"loss": 0.0946, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4092001373154823, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.690768104334015e-06, |
|
"loss": 0.1031, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4098867147270855, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.689563666043065e-06, |
|
"loss": 0.1008, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.41057329213868865, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.688356970495252e-06, |
|
"loss": 0.1021, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.4112598695502918, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.687148018340717e-06, |
|
"loss": 0.0992, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.411946446961895, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.685936810230824e-06, |
|
"loss": 0.096, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41263302437349814, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.684723346818149e-06, |
|
"loss": 0.1063, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.41331960178510124, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.683507628756477e-06, |
|
"loss": 0.0882, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4140061791967044, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.682289656700823e-06, |
|
"loss": 0.0973, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.41469275660830757, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.6810694313074e-06, |
|
"loss": 0.0842, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.41537933401991073, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.679846953233644e-06, |
|
"loss": 0.0895, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4160659114315139, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.678622223138203e-06, |
|
"loss": 0.1089, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.41675248884311705, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.677395241680939e-06, |
|
"loss": 0.0863, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4174390662547202, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 9.676166009522925e-06, |
|
"loss": 0.0873, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4181256436663234, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.674934527326447e-06, |
|
"loss": 0.0956, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.41881222107792654, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.673700795755008e-06, |
|
"loss": 0.1002, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4194987984895297, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.672464815473315e-06, |
|
"loss": 0.1013, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.42018537590113286, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.67122658714729e-06, |
|
"loss": 0.105, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.420871953312736, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.669986111444071e-06, |
|
"loss": 0.0911, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4215585307243392, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.668743389032001e-06, |
|
"loss": 0.1001, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.42224510813594235, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.667498420580632e-06, |
|
"loss": 0.0878, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4229316855475455, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 9.666251206760732e-06, |
|
"loss": 0.0914, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4236182629591486, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.665001748244276e-06, |
|
"loss": 0.1006, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4243048403707518, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.663750045704448e-06, |
|
"loss": 0.1053, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.42499141778235494, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.66249609981564e-06, |
|
"loss": 0.1015, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4256779951939581, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.661239911253457e-06, |
|
"loss": 0.0913, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.42636457260556127, |
|
"grad_norm": 0.04443359375, |
|
"learning_rate": 9.659981480694708e-06, |
|
"loss": 0.0923, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4270511500171644, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.65872080881741e-06, |
|
"loss": 0.1104, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4277377274287676, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.65745789630079e-06, |
|
"loss": 0.0925, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.42842430484037075, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.656192743825283e-06, |
|
"loss": 0.084, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4291108822519739, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 9.654925352072526e-06, |
|
"loss": 0.106, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4297974596635771, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.653655721725367e-06, |
|
"loss": 0.0945, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.43048403707518024, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.652383853467858e-06, |
|
"loss": 0.1017, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4311706144867834, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.651109747985257e-06, |
|
"loss": 0.0931, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.43185719189838656, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.649833405964029e-06, |
|
"loss": 0.0952, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4325437693099897, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.64855482809184e-06, |
|
"loss": 0.0954, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4332303467215929, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.647274015057568e-06, |
|
"loss": 0.0936, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.433916924133196, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.645990967551287e-06, |
|
"loss": 0.0965, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.43460350154479915, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.64470568626428e-06, |
|
"loss": 0.1029, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.4352900789564023, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.64341817188903e-06, |
|
"loss": 0.103, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.4359766563680055, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.642128425119226e-06, |
|
"loss": 0.0952, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.43666323377960864, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.640836446649761e-06, |
|
"loss": 0.09, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.4373498111912118, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.639542237176726e-06, |
|
"loss": 0.0992, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.43803638860281496, |
|
"grad_norm": 0.057861328125, |
|
"learning_rate": 9.638245797397418e-06, |
|
"loss": 0.1053, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.4387229660144181, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.636947128010332e-06, |
|
"loss": 0.0963, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.4394095434260213, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 9.635646229715168e-06, |
|
"loss": 0.0979, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.44009612083762445, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.634343103212824e-06, |
|
"loss": 0.0891, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.4407826982492276, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.6330377492054e-06, |
|
"loss": 0.0994, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.4414692756608308, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.631730168396196e-06, |
|
"loss": 0.0962, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.44215585307243394, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 9.630420361489711e-06, |
|
"loss": 0.0996, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.4428424304840371, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.629108329191646e-06, |
|
"loss": 0.0907, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.44352900789564026, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.627794072208896e-06, |
|
"loss": 0.0972, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.44421558530724337, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.62647759124956e-06, |
|
"loss": 0.0972, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.44490216271884653, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.62515888702293e-06, |
|
"loss": 0.0909, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4455887401304497, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.6238379602395e-06, |
|
"loss": 0.0956, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.44627531754205285, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 9.622514811610964e-06, |
|
"loss": 0.103, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.446961894953656, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.621189441850206e-06, |
|
"loss": 0.0925, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.4476484723652592, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 9.619861851671306e-06, |
|
"loss": 0.0864, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.44833504977686234, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.618532041789548e-06, |
|
"loss": 0.1086, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.4490216271884655, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.61720001292141e-06, |
|
"loss": 0.0875, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.44970820460006866, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.61586576578456e-06, |
|
"loss": 0.108, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.4503947820116718, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.614529301097867e-06, |
|
"loss": 0.097, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.451081359423275, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.613190619581393e-06, |
|
"loss": 0.091, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.45176793683487815, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.611849721956392e-06, |
|
"loss": 0.092, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.4524545142464813, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.610506608945315e-06, |
|
"loss": 0.093, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.45314109165808447, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.609161281271808e-06, |
|
"loss": 0.1124, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.45382766906968763, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.607813739660705e-06, |
|
"loss": 0.0958, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.45451424648129074, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.606463984838034e-06, |
|
"loss": 0.0844, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.4552008238928939, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.605112017531022e-06, |
|
"loss": 0.0888, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.45588740130449706, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.603757838468079e-06, |
|
"loss": 0.0944, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.4565739787161002, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.602401448378816e-06, |
|
"loss": 0.1006, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4572605561277034, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.601042847994021e-06, |
|
"loss": 0.0997, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.45794713353930655, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.59968203804569e-06, |
|
"loss": 0.0903, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.4586337109509097, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.598319019267e-06, |
|
"loss": 0.0885, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4593202883625129, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.596953792392318e-06, |
|
"loss": 0.1093, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.46000686577411604, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.595586358157202e-06, |
|
"loss": 0.1034, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4606934431857192, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.5942167172984e-06, |
|
"loss": 0.0948, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.46138002059732236, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.592844870553849e-06, |
|
"loss": 0.0891, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4620665980089255, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.591470818662672e-06, |
|
"loss": 0.0923, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.4627531754205287, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.590094562365184e-06, |
|
"loss": 0.0913, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.46343975283213185, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 9.588716102402882e-06, |
|
"loss": 0.0977, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.464126330243735, |
|
"grad_norm": 0.057861328125, |
|
"learning_rate": 9.58733543951846e-06, |
|
"loss": 0.0921, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.4648129076553381, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.58595257445579e-06, |
|
"loss": 0.0969, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.4654994850669413, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.584567507959929e-06, |
|
"loss": 0.0944, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.46618606247854444, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.583180240777128e-06, |
|
"loss": 0.0925, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.4668726398901476, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.581790773654821e-06, |
|
"loss": 0.0953, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.46755921730175076, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 9.580399107341627e-06, |
|
"loss": 0.0962, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.4682457947133539, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 9.579005242587344e-06, |
|
"loss": 0.0924, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.4689323721249571, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.577609180142967e-06, |
|
"loss": 0.091, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.46961894953656025, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.576210920760662e-06, |
|
"loss": 0.0871, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.4703055269481634, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.574810465193787e-06, |
|
"loss": 0.0861, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4709921043597666, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.57340781419688e-06, |
|
"loss": 0.0872, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.47167868177136973, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.572002968525662e-06, |
|
"loss": 0.0869, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.4723652591829729, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.57059592893704e-06, |
|
"loss": 0.0853, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.47305183659457606, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.569186696189095e-06, |
|
"loss": 0.1048, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.4737384140061792, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.567775271041099e-06, |
|
"loss": 0.101, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4744249914177824, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.566361654253499e-06, |
|
"loss": 0.0919, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.4751115688293855, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.564945846587925e-06, |
|
"loss": 0.0973, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.47579814624098865, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.563527848807186e-06, |
|
"loss": 0.0926, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.4764847236525918, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.562107661675276e-06, |
|
"loss": 0.0939, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.477171301064195, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.560685285957361e-06, |
|
"loss": 0.0936, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.47785787847579814, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.55926072241979e-06, |
|
"loss": 0.0903, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.4785444558874013, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.557833971830093e-06, |
|
"loss": 0.0903, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.47923103329900446, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.556405034956974e-06, |
|
"loss": 0.089, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.4799176107106076, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.554973912570316e-06, |
|
"loss": 0.098, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.4806041881222108, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.553540605441182e-06, |
|
"loss": 0.0954, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48129076553381395, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.552105114341811e-06, |
|
"loss": 0.0783, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.4819773429454171, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.550667440045618e-06, |
|
"loss": 0.0872, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.48266392035702027, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.549227583327193e-06, |
|
"loss": 0.0912, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.48335049776862343, |
|
"grad_norm": 0.05859375, |
|
"learning_rate": 9.547785544962303e-06, |
|
"loss": 0.0978, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.4840370751802266, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.546341325727893e-06, |
|
"loss": 0.0913, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.4847236525918297, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.54489492640208e-06, |
|
"loss": 0.0833, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.48541023000343286, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 9.543446347764159e-06, |
|
"loss": 0.086, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.486096807415036, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.541995590594589e-06, |
|
"loss": 0.0908, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.4867833848266392, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 9.540542655675014e-06, |
|
"loss": 0.1035, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.48746996223824235, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.539087543788251e-06, |
|
"loss": 0.1008, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4881565396498455, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.537630255718285e-06, |
|
"loss": 0.0905, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.4888431170614487, |
|
"grad_norm": 0.057861328125, |
|
"learning_rate": 9.53617079225027e-06, |
|
"loss": 0.1008, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.48952969447305184, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.534709154170542e-06, |
|
"loss": 0.0945, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.490216271884655, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.533245342266604e-06, |
|
"loss": 0.0926, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.49090284929625816, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.531779357327125e-06, |
|
"loss": 0.0938, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.4915894267078613, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.530311200141957e-06, |
|
"loss": 0.0921, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.4922760041194645, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.528840871502108e-06, |
|
"loss": 0.0971, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.49296258153106765, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.527368372199767e-06, |
|
"loss": 0.0921, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.4936491589426708, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.525893703028289e-06, |
|
"loss": 0.0892, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.49433573635427397, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.524416864782196e-06, |
|
"loss": 0.0887, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4950223137658771, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.522937858257177e-06, |
|
"loss": 0.0823, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.49570889117748024, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.5214566842501e-06, |
|
"loss": 0.0999, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.4963954685890834, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.519973343558984e-06, |
|
"loss": 0.0871, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.49708204600068656, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.518487836983035e-06, |
|
"loss": 0.1034, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.4977686234122897, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.517000165322607e-06, |
|
"loss": 0.0851, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4984552008238929, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 9.515510329379234e-06, |
|
"loss": 0.0945, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.49914177823549605, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.514018329955608e-06, |
|
"loss": 0.0937, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.4998283556470992, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.51252416785559e-06, |
|
"loss": 0.0926, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5005149330587023, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 9.51102784388421e-06, |
|
"loss": 0.1019, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5012015104703055, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.509529358847655e-06, |
|
"loss": 0.1007, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5018880878819086, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.508028713553282e-06, |
|
"loss": 0.1017, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5025746652935118, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.50652590880961e-06, |
|
"loss": 0.0902, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.503261242705115, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.505020945426318e-06, |
|
"loss": 0.0859, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5039478201167181, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 9.503513824214254e-06, |
|
"loss": 0.0862, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5046343975283213, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.502004545985428e-06, |
|
"loss": 0.096, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5053209749399244, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.500493111553007e-06, |
|
"loss": 0.1036, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5060075523515276, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.498979521731327e-06, |
|
"loss": 0.093, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5066941297631308, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 9.497463777335875e-06, |
|
"loss": 0.1004, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5073807071747339, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 9.495945879183312e-06, |
|
"loss": 0.0959, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5080672845863371, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.49442582809145e-06, |
|
"loss": 0.0913, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5087538619979403, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 9.49290362487926e-06, |
|
"loss": 0.1015, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5094404394095434, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.49137927036688e-06, |
|
"loss": 0.103, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5101270168211466, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 9.489852765375602e-06, |
|
"loss": 0.0989, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5108135942327497, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.488324110727878e-06, |
|
"loss": 0.0826, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5115001716443529, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 9.486793307247318e-06, |
|
"loss": 0.0959, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5121867490559561, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.48526035575869e-06, |
|
"loss": 0.0856, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5128733264675592, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.483725257087919e-06, |
|
"loss": 0.0915, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5135599038791624, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.482188012062084e-06, |
|
"loss": 0.0828, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5142464812907656, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.480648621509426e-06, |
|
"loss": 0.0934, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5149330587023687, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.47910708625934e-06, |
|
"loss": 0.0966, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5156196361139719, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.477563407142372e-06, |
|
"loss": 0.0973, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.516306213525575, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.476017584990229e-06, |
|
"loss": 0.0875, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5169927909371782, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.474469620635773e-06, |
|
"loss": 0.0918, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5176793683487814, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.472919514913013e-06, |
|
"loss": 0.0888, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5183659457603845, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.471367268657121e-06, |
|
"loss": 0.097, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5190525231719877, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.469812882704413e-06, |
|
"loss": 0.0899, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5197391005835909, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.468256357892367e-06, |
|
"loss": 0.1025, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5204256779951939, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.466697695059604e-06, |
|
"loss": 0.0955, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5211122554067971, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.465136895045907e-06, |
|
"loss": 0.0967, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5217988328184002, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.4635739586922e-06, |
|
"loss": 0.0967, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5224854102300034, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.462008886840567e-06, |
|
"loss": 0.0854, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5231719876416066, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.460441680334236e-06, |
|
"loss": 0.0929, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5238585650532097, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.458872340017592e-06, |
|
"loss": 0.0887, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5245451424648129, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.45730086673616e-06, |
|
"loss": 0.1038, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.525231719876416, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.455727261336626e-06, |
|
"loss": 0.0929, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5259182972880192, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 9.454151524666815e-06, |
|
"loss": 0.0988, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5266048746996224, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.452573657575705e-06, |
|
"loss": 0.0942, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5272914521112255, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.450993660913418e-06, |
|
"loss": 0.0837, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5279780295228287, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.449411535531227e-06, |
|
"loss": 0.0894, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5286646069344318, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.447827282281551e-06, |
|
"loss": 0.1023, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.529351184346035, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.44624090201796e-06, |
|
"loss": 0.0952, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5300377617576382, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.444652395595159e-06, |
|
"loss": 0.1045, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5307243391692413, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.443061763869007e-06, |
|
"loss": 0.0942, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5314109165808445, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.44146900769651e-06, |
|
"loss": 0.0997, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5320974939924477, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.439874127935807e-06, |
|
"loss": 0.0953, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5327840714040508, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.438277125446194e-06, |
|
"loss": 0.0955, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.533470648815654, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 9.436678001088106e-06, |
|
"loss": 0.0969, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5341572262272571, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.435076755723119e-06, |
|
"loss": 0.0874, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5348438036388603, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.43347339021395e-06, |
|
"loss": 0.0976, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5355303810504635, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.431867905424466e-06, |
|
"loss": 0.0917, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5362169584620666, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.430260302219672e-06, |
|
"loss": 0.0985, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5369035358736698, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.428650581465713e-06, |
|
"loss": 0.0945, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.537590113285273, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.427038744029872e-06, |
|
"loss": 0.1026, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5382766906968761, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.425424790780581e-06, |
|
"loss": 0.0837, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5389632681084793, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 9.423808722587407e-06, |
|
"loss": 0.0862, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5396498455200824, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.422190540321055e-06, |
|
"loss": 0.0933, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5403364229316856, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.42057024485337e-06, |
|
"loss": 0.0911, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5410230003432887, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.418947837057338e-06, |
|
"loss": 0.0949, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5417095777548918, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.41732331780708e-06, |
|
"loss": 0.0948, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.542396155166495, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 9.415696687977857e-06, |
|
"loss": 0.0874, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5430827325780981, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 9.414067948446064e-06, |
|
"loss": 0.0907, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5437693099897013, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 9.412437100089236e-06, |
|
"loss": 0.0937, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5444558874013045, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.410804143786046e-06, |
|
"loss": 0.0928, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5451424648129076, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.409169080416296e-06, |
|
"loss": 0.0875, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5458290422245108, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.407531910860928e-06, |
|
"loss": 0.0894, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.546515619636114, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.405892636002016e-06, |
|
"loss": 0.0935, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5472021970477171, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.404251256722772e-06, |
|
"loss": 0.0936, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5478887744593203, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.402607773907539e-06, |
|
"loss": 0.0898, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5485753518709234, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.400962188441795e-06, |
|
"loss": 0.098, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5492619292825266, |
|
"grad_norm": 0.05859375, |
|
"learning_rate": 9.39931450121215e-06, |
|
"loss": 0.0848, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5499485066941298, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 9.397664713106345e-06, |
|
"loss": 0.1012, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5506350841057329, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.396012825013256e-06, |
|
"loss": 0.101, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5513216615173361, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.394358837822886e-06, |
|
"loss": 0.1025, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5520082389289392, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.392702752426377e-06, |
|
"loss": 0.0948, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5526948163405424, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.391044569715987e-06, |
|
"loss": 0.0985, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5533813937521456, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.389384290585123e-06, |
|
"loss": 0.1005, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5540679711637487, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.387721915928309e-06, |
|
"loss": 0.087, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5547545485753519, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.386057446641195e-06, |
|
"loss": 0.0993, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.555441125986955, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.384390883620573e-06, |
|
"loss": 0.0926, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5561277033985582, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.38272222776435e-06, |
|
"loss": 0.1032, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5568142808101614, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 9.381051479971569e-06, |
|
"loss": 0.0889, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.5575008582217645, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.379378641142394e-06, |
|
"loss": 0.0935, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.5581874356333677, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.377703712178122e-06, |
|
"loss": 0.1001, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5588740130449709, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.37602669398117e-06, |
|
"loss": 0.0928, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.559560590456574, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.374347587455087e-06, |
|
"loss": 0.0922, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5602471678681772, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 9.372666393504537e-06, |
|
"loss": 0.0906, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.5609337452797803, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.370983113035323e-06, |
|
"loss": 0.1046, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.5616203226913834, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 9.369297746954358e-06, |
|
"loss": 0.0881, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.5623069001029866, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.367610296169689e-06, |
|
"loss": 0.086, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5629934775145897, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.365920761590478e-06, |
|
"loss": 0.105, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5636800549261929, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.36422914412702e-06, |
|
"loss": 0.0976, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.564366632337796, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.362535444690721e-06, |
|
"loss": 0.0834, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5650532097493992, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.360839664194116e-06, |
|
"loss": 0.098, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5657397871610024, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.35914180355086e-06, |
|
"loss": 0.0978, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5664263645726055, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.357441863675727e-06, |
|
"loss": 0.1036, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5671129419842087, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 9.355739845484611e-06, |
|
"loss": 0.0887, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.5677995193958119, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.354035749894527e-06, |
|
"loss": 0.0994, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.568486096807415, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.352329577823613e-06, |
|
"loss": 0.0922, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.5691726742190182, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.350621330191116e-06, |
|
"loss": 0.0962, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.5698592516306213, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.348911007917411e-06, |
|
"loss": 0.0928, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5705458290422245, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.347198611923986e-06, |
|
"loss": 0.0984, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.5712324064538277, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.345484143133447e-06, |
|
"loss": 0.0835, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.5719189838654308, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.343767602469519e-06, |
|
"loss": 0.0918, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.572605561277034, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.342048990857037e-06, |
|
"loss": 0.0963, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.5732921386886372, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.340328309221962e-06, |
|
"loss": 0.0853, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5739787161002403, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.33860555849136e-06, |
|
"loss": 0.0893, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.5746652935118435, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.336880739593415e-06, |
|
"loss": 0.1013, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.5753518709234466, |
|
"grad_norm": 0.06103515625, |
|
"learning_rate": 9.335153853457431e-06, |
|
"loss": 0.0981, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.5760384483350498, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 9.333424901013818e-06, |
|
"loss": 0.0936, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.576725025746653, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.331693883194105e-06, |
|
"loss": 0.0915, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5774116031582561, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 9.329960800930929e-06, |
|
"loss": 0.0912, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.5780981805698593, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.328225655158045e-06, |
|
"loss": 0.094, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.5787847579814624, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.32648844681031e-06, |
|
"loss": 0.0873, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.5794713353930656, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.324749176823704e-06, |
|
"loss": 0.0872, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.5801579128046688, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 9.323007846135312e-06, |
|
"loss": 0.1131, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.5808444902162719, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.321264455683327e-06, |
|
"loss": 0.1014, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.5815310676278751, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.31951900640706e-06, |
|
"loss": 0.0864, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.5822176450394781, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.317771499246918e-06, |
|
"loss": 0.0848, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.5829042224510813, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.316021935144431e-06, |
|
"loss": 0.091, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.5835907998626845, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.314270315042225e-06, |
|
"loss": 0.1072, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5842773772742876, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.312516639884047e-06, |
|
"loss": 0.0895, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.5849639546858908, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.310760910614736e-06, |
|
"loss": 0.0877, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.585650532097494, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.309003128180249e-06, |
|
"loss": 0.092, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.5863371095090971, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 9.307243293527645e-06, |
|
"loss": 0.1001, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.5870236869207003, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.30548140760509e-06, |
|
"loss": 0.091, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.5877102643323034, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.303717471361855e-06, |
|
"loss": 0.0973, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.5883968417439066, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 9.301951485748314e-06, |
|
"loss": 0.0982, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.5890834191555098, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.300183451715945e-06, |
|
"loss": 0.0876, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.5897699965671129, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.298413370217333e-06, |
|
"loss": 0.0829, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.5904565739787161, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.296641242206165e-06, |
|
"loss": 0.0866, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5911431513903193, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 9.294867068637227e-06, |
|
"loss": 0.0947, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.5918297288019224, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.29309085046641e-06, |
|
"loss": 0.102, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.5925163062135256, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.29131258865071e-06, |
|
"loss": 0.0918, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.5932028836251287, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.289532284148218e-06, |
|
"loss": 0.0869, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.5938894610367319, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.287749937918125e-06, |
|
"loss": 0.0993, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.5945760384483351, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.285965550920732e-06, |
|
"loss": 0.1037, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.5952626158599382, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.284179124117426e-06, |
|
"loss": 0.0958, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.5959491932715414, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.282390658470703e-06, |
|
"loss": 0.1054, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.5966357706831446, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.280600154944153e-06, |
|
"loss": 0.0934, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.5973223480947477, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 9.278807614502467e-06, |
|
"loss": 0.0926, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5980089255063509, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.27701303811143e-06, |
|
"loss": 0.0886, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.598695502917954, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.275216426737924e-06, |
|
"loss": 0.0868, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.5993820803295572, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.273417781349933e-06, |
|
"loss": 0.0865, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6000686577411604, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.271617102916528e-06, |
|
"loss": 0.0964, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6007552351527635, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.269814392407883e-06, |
|
"loss": 0.0864, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6014418125643667, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 9.268009650795264e-06, |
|
"loss": 0.1008, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6021283899759697, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 9.26620287905103e-06, |
|
"loss": 0.0897, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6028149673875729, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.264394078148636e-06, |
|
"loss": 0.0867, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6035015447991761, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.26258324906263e-06, |
|
"loss": 0.086, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6041881222107792, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 9.260770392768652e-06, |
|
"loss": 0.0777, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6048746996223824, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.258955510243431e-06, |
|
"loss": 0.0914, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6055612770339855, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.257138602464795e-06, |
|
"loss": 0.0919, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6062478544455887, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.255319670411658e-06, |
|
"loss": 0.0873, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6069344318571919, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.253498715064025e-06, |
|
"loss": 0.0908, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.607621009268795, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.251675737402992e-06, |
|
"loss": 0.0957, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6083075866803982, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.249850738410749e-06, |
|
"loss": 0.0968, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6089941640920014, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.248023719070563e-06, |
|
"loss": 0.091, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6096807415036045, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 9.246194680366802e-06, |
|
"loss": 0.0961, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6103673189152077, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 9.244363623284916e-06, |
|
"loss": 0.0899, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6110538963268108, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.242530548811444e-06, |
|
"loss": 0.0935, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.611740473738414, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.240695457934012e-06, |
|
"loss": 0.1028, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6124270511500172, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.23885835164133e-06, |
|
"loss": 0.0843, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6131136285616203, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.237019230923196e-06, |
|
"loss": 0.0933, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6138002059732235, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.235178096770494e-06, |
|
"loss": 0.101, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6144867833848267, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 9.233334950175194e-06, |
|
"loss": 0.0922, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6151733607964298, |
|
"grad_norm": 0.05517578125, |
|
"learning_rate": 9.231489792130343e-06, |
|
"loss": 0.0838, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.615859938208033, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 9.229642623630081e-06, |
|
"loss": 0.0853, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6165465156196361, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.227793445669627e-06, |
|
"loss": 0.0868, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6172330930312393, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.225942259245281e-06, |
|
"loss": 0.0944, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6179196704428425, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 9.224089065354428e-06, |
|
"loss": 0.1033, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6186062478544456, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.222233864995533e-06, |
|
"loss": 0.0969, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6192928252660488, |
|
"grad_norm": 0.06103515625, |
|
"learning_rate": 9.220376659168141e-06, |
|
"loss": 0.105, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.619979402677652, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 9.21851744887288e-06, |
|
"loss": 0.095, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6206659800892551, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 9.216656235111463e-06, |
|
"loss": 0.0905, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6213525575008583, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.214793018886666e-06, |
|
"loss": 0.095, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6220391349124614, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 9.212927801202361e-06, |
|
"loss": 0.0948, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6227257123240645, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.211060583063489e-06, |
|
"loss": 0.0764, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6234122897356676, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.209191365476074e-06, |
|
"loss": 0.1005, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6240988671472708, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.207320149447212e-06, |
|
"loss": 0.0921, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.624785444558874, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.20544693598508e-06, |
|
"loss": 0.0889, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6254720219704771, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 9.20357172609893e-06, |
|
"loss": 0.0899, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6261585993820803, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.201694520799086e-06, |
|
"loss": 0.0969, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6268451767936835, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.199815321096953e-06, |
|
"loss": 0.0914, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6275317542052866, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 9.19793412800501e-06, |
|
"loss": 0.0913, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6282183316168898, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 9.196050942536806e-06, |
|
"loss": 0.0856, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6289049090284929, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.194165765706963e-06, |
|
"loss": 0.0999, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6295914864400961, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.192278598531182e-06, |
|
"loss": 0.0952, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6302780638516993, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.19038944202623e-06, |
|
"loss": 0.0897, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6309646412633024, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.18849829720995e-06, |
|
"loss": 0.0901, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6316512186749056, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 9.186605165101253e-06, |
|
"loss": 0.1012, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6323377960865088, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 9.184710046720123e-06, |
|
"loss": 0.0927, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6330243734981119, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.182812943087614e-06, |
|
"loss": 0.0838, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6337109509097151, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 9.18091385522585e-06, |
|
"loss": 0.0855, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6343975283213182, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.179012784158023e-06, |
|
"loss": 0.0889, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6350841057329214, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.177109730908393e-06, |
|
"loss": 0.0857, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6357706831445246, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 9.175204696502288e-06, |
|
"loss": 0.103, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6364572605561277, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.173297681966105e-06, |
|
"loss": 0.0952, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6371438379677309, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.171388688327307e-06, |
|
"loss": 0.0926, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.637830415379334, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.169477716614425e-06, |
|
"loss": 0.0887, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6385169927909372, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.167564767857052e-06, |
|
"loss": 0.0798, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6392035702025404, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 9.165649843085848e-06, |
|
"loss": 0.0948, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6398901476141435, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 9.163732943332539e-06, |
|
"loss": 0.0893, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6405767250257467, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 9.161814069629914e-06, |
|
"loss": 0.0857, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6412633024373499, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 9.159893223011824e-06, |
|
"loss": 0.0977, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.641949879848953, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.157970404513185e-06, |
|
"loss": 0.0938, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6426364572605562, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 9.156045615169978e-06, |
|
"loss": 0.0931, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6433230346721592, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.154118856019239e-06, |
|
"loss": 0.0912, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.6440096120837624, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.15219012809907e-06, |
|
"loss": 0.0838, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6446961894953656, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 9.150259432448632e-06, |
|
"loss": 0.0969, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6453827669069687, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 9.148326770108147e-06, |
|
"loss": 0.0904, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6460693443185719, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.146392142118899e-06, |
|
"loss": 0.092, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.646755921730175, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 9.144455549523227e-06, |
|
"loss": 0.0849, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.6474424991417782, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.14251699336453e-06, |
|
"loss": 0.0963, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.6481290765533814, |
|
"grad_norm": 0.0537109375, |
|
"learning_rate": 9.140576474687263e-06, |
|
"loss": 0.087, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6488156539649845, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 9.138633994536945e-06, |
|
"loss": 0.0896, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6495022313765877, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 9.136689553960144e-06, |
|
"loss": 0.0901, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6501888087881909, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.134743154004488e-06, |
|
"loss": 0.0904, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.650875386199794, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 9.132794795718662e-06, |
|
"loss": 0.0975, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6515619636113972, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 9.1308444801524e-06, |
|
"loss": 0.1025, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.6522485410230003, |
|
"grad_norm": 0.055419921875, |
|
"learning_rate": 9.128892208356496e-06, |
|
"loss": 0.0894, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6529351184346035, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.126937981382802e-06, |
|
"loss": 0.0826, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6536216958462067, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 9.12498180028421e-06, |
|
"loss": 0.092, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6543082732578098, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.12302366611468e-06, |
|
"loss": 0.0983, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.654994850669413, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 9.121063579929214e-06, |
|
"loss": 0.0926, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.6556814280810161, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 9.119101542783868e-06, |
|
"loss": 0.0969, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6563680054926193, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.117137555735753e-06, |
|
"loss": 0.0981, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6570545829042225, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.115171619843025e-06, |
|
"loss": 0.0978, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6577411603158256, |
|
"grad_norm": 0.05859375, |
|
"learning_rate": 9.113203736164894e-06, |
|
"loss": 0.0913, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6584277377274288, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 9.111233905761618e-06, |
|
"loss": 0.0937, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.659114315139032, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 9.109262129694506e-06, |
|
"loss": 0.0891, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6598008925506351, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.107288409025909e-06, |
|
"loss": 0.1115, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.6604874699622383, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 9.105312744819232e-06, |
|
"loss": 0.0859, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6611740473738414, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.103335138138926e-06, |
|
"loss": 0.1066, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.6618606247854446, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 9.101355590050489e-06, |
|
"loss": 0.092, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6625472021970478, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.09937410162046e-06, |
|
"loss": 0.0964, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6632337796086509, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.09739067391643e-06, |
|
"loss": 0.093, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.663920357020254, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.095405308007029e-06, |
|
"loss": 0.0849, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.6646069344318571, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.093418004961939e-06, |
|
"loss": 0.0884, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.6652935118434603, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 9.091428765851877e-06, |
|
"loss": 0.0883, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.6659800892550635, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.089437591748607e-06, |
|
"loss": 0.09, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.087444483724937e-06, |
|
"loss": 0.0873, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.6673532440782698, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.085449442854715e-06, |
|
"loss": 0.0878, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.668039821489873, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 9.08345247021283e-06, |
|
"loss": 0.0842, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.6687263989014761, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 9.081453566875215e-06, |
|
"loss": 0.1038, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.6694129763130793, |
|
"grad_norm": 0.06103515625, |
|
"learning_rate": 9.079452733918841e-06, |
|
"loss": 0.0974, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6700995537246824, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.077449972421716e-06, |
|
"loss": 0.0938, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.6707861311362856, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.07544528346289e-06, |
|
"loss": 0.1028, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.6714727085478888, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 9.073438668122454e-06, |
|
"loss": 0.0789, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.6721592859594919, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 9.07143012748153e-06, |
|
"loss": 0.0985, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.6728458633710951, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.069419662622284e-06, |
|
"loss": 0.096, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6735324407826982, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 9.067407274627917e-06, |
|
"loss": 0.0943, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.6742190181943014, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 9.06539296458266e-06, |
|
"loss": 0.0937, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.6749055956059046, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 9.063376733571791e-06, |
|
"loss": 0.091, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.6755921730175077, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 9.061358582681614e-06, |
|
"loss": 0.092, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.6762787504291109, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 9.05933851299947e-06, |
|
"loss": 0.0982, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.6769653278407141, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.057316525613735e-06, |
|
"loss": 0.0859, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.6776519052523172, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.055292621613815e-06, |
|
"loss": 0.1053, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.6783384826639204, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 9.053266802090152e-06, |
|
"loss": 0.0977, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.6790250600755235, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 9.051239068134221e-06, |
|
"loss": 0.0913, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.6797116374871267, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.049209420838522e-06, |
|
"loss": 0.0874, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6803982148987299, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 9.047177861296595e-06, |
|
"loss": 0.0895, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.681084792310333, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 9.045144390603e-06, |
|
"loss": 0.0886, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.6817713697219362, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 9.043109009853337e-06, |
|
"loss": 0.0862, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.6824579471335394, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 9.041071720144232e-06, |
|
"loss": 0.0853, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.6831445245451425, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 9.039032522573328e-06, |
|
"loss": 0.1006, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.6838311019567456, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 9.036991418239316e-06, |
|
"loss": 0.0803, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.6845176793683487, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 9.034948408241898e-06, |
|
"loss": 0.0875, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.6852042567799519, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 9.032903493681811e-06, |
|
"loss": 0.085, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.685890834191555, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 9.030856675660816e-06, |
|
"loss": 0.0925, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.6865774116031582, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 9.028807955281701e-06, |
|
"loss": 0.0915, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 4368, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.08857892274176e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|