|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 365, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027397260273972603, |
|
"grad_norm": 1.150786280632019, |
|
"learning_rate": 1e-05, |
|
"loss": 1.8496, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005479452054794521, |
|
"grad_norm": 1.0707762241363525, |
|
"learning_rate": 9.972602739726028e-06, |
|
"loss": 1.8196, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00821917808219178, |
|
"grad_norm": 0.9879312515258789, |
|
"learning_rate": 9.945205479452056e-06, |
|
"loss": 1.8851, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010958904109589041, |
|
"grad_norm": 0.8968678712844849, |
|
"learning_rate": 9.917808219178083e-06, |
|
"loss": 1.8575, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0136986301369863, |
|
"grad_norm": 0.8026939034461975, |
|
"learning_rate": 9.89041095890411e-06, |
|
"loss": 1.7822, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01643835616438356, |
|
"grad_norm": 0.6270458102226257, |
|
"learning_rate": 9.863013698630138e-06, |
|
"loss": 1.7336, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019178082191780823, |
|
"grad_norm": 0.5464762449264526, |
|
"learning_rate": 9.835616438356166e-06, |
|
"loss": 1.723, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.021917808219178082, |
|
"grad_norm": 0.5061464905738831, |
|
"learning_rate": 9.808219178082193e-06, |
|
"loss": 1.7244, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024657534246575342, |
|
"grad_norm": 0.4670104384422302, |
|
"learning_rate": 9.78082191780822e-06, |
|
"loss": 1.7493, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0273972602739726, |
|
"grad_norm": 0.45351746678352356, |
|
"learning_rate": 9.753424657534248e-06, |
|
"loss": 1.7181, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030136986301369864, |
|
"grad_norm": 0.4705427885055542, |
|
"learning_rate": 9.726027397260275e-06, |
|
"loss": 1.7347, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03287671232876712, |
|
"grad_norm": 0.4133734405040741, |
|
"learning_rate": 9.698630136986303e-06, |
|
"loss": 1.6304, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03561643835616438, |
|
"grad_norm": 0.41555055975914, |
|
"learning_rate": 9.67123287671233e-06, |
|
"loss": 1.5775, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.038356164383561646, |
|
"grad_norm": 0.4109717607498169, |
|
"learning_rate": 9.643835616438358e-06, |
|
"loss": 1.6275, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0410958904109589, |
|
"grad_norm": 0.4048368036746979, |
|
"learning_rate": 9.616438356164385e-06, |
|
"loss": 1.5652, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.043835616438356165, |
|
"grad_norm": 0.3852587044239044, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 1.5212, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04657534246575343, |
|
"grad_norm": 0.3782636821269989, |
|
"learning_rate": 9.561643835616438e-06, |
|
"loss": 1.4995, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.049315068493150684, |
|
"grad_norm": 0.4001636207103729, |
|
"learning_rate": 9.534246575342466e-06, |
|
"loss": 1.5035, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.052054794520547946, |
|
"grad_norm": 0.3587205410003662, |
|
"learning_rate": 9.506849315068493e-06, |
|
"loss": 1.4884, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"grad_norm": 0.4003235101699829, |
|
"learning_rate": 9.47945205479452e-06, |
|
"loss": 1.5282, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.057534246575342465, |
|
"grad_norm": 0.3399674892425537, |
|
"learning_rate": 9.452054794520548e-06, |
|
"loss": 1.4062, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06027397260273973, |
|
"grad_norm": 0.37624427676200867, |
|
"learning_rate": 9.424657534246576e-06, |
|
"loss": 1.4152, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06301369863013699, |
|
"grad_norm": 0.35346779227256775, |
|
"learning_rate": 9.397260273972603e-06, |
|
"loss": 1.4174, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06575342465753424, |
|
"grad_norm": 0.33159714937210083, |
|
"learning_rate": 9.36986301369863e-06, |
|
"loss": 1.3408, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 0.3265334963798523, |
|
"learning_rate": 9.342465753424658e-06, |
|
"loss": 1.3865, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07123287671232877, |
|
"grad_norm": 0.317669540643692, |
|
"learning_rate": 9.315068493150685e-06, |
|
"loss": 1.3516, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07397260273972603, |
|
"grad_norm": 0.3345702886581421, |
|
"learning_rate": 9.287671232876713e-06, |
|
"loss": 1.2656, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07671232876712329, |
|
"grad_norm": 0.3096367418766022, |
|
"learning_rate": 9.26027397260274e-06, |
|
"loss": 1.4123, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07945205479452055, |
|
"grad_norm": 0.3097746968269348, |
|
"learning_rate": 9.232876712328768e-06, |
|
"loss": 1.3655, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0821917808219178, |
|
"grad_norm": 0.3306300938129425, |
|
"learning_rate": 9.205479452054795e-06, |
|
"loss": 1.3966, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08493150684931507, |
|
"grad_norm": 0.26500970125198364, |
|
"learning_rate": 9.178082191780823e-06, |
|
"loss": 1.2702, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08767123287671233, |
|
"grad_norm": 0.31576573848724365, |
|
"learning_rate": 9.15068493150685e-06, |
|
"loss": 1.3126, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09041095890410959, |
|
"grad_norm": 0.2620871365070343, |
|
"learning_rate": 9.123287671232878e-06, |
|
"loss": 1.3494, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09315068493150686, |
|
"grad_norm": 0.30692246556282043, |
|
"learning_rate": 9.095890410958905e-06, |
|
"loss": 1.3417, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0958904109589041, |
|
"grad_norm": 0.34853464365005493, |
|
"learning_rate": 9.068493150684932e-06, |
|
"loss": 1.2773, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09863013698630137, |
|
"grad_norm": 0.26933398842811584, |
|
"learning_rate": 9.04109589041096e-06, |
|
"loss": 1.2975, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10136986301369863, |
|
"grad_norm": 0.24809734523296356, |
|
"learning_rate": 9.013698630136987e-06, |
|
"loss": 1.2504, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10410958904109589, |
|
"grad_norm": 0.28276240825653076, |
|
"learning_rate": 8.986301369863015e-06, |
|
"loss": 1.3104, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10684931506849316, |
|
"grad_norm": 0.22766578197479248, |
|
"learning_rate": 8.958904109589042e-06, |
|
"loss": 1.2021, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 0.24498251080513, |
|
"learning_rate": 8.93150684931507e-06, |
|
"loss": 1.205, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11232876712328767, |
|
"grad_norm": 0.2171860635280609, |
|
"learning_rate": 8.904109589041097e-06, |
|
"loss": 1.2178, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11506849315068493, |
|
"grad_norm": 0.2212899923324585, |
|
"learning_rate": 8.876712328767125e-06, |
|
"loss": 1.2356, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1178082191780822, |
|
"grad_norm": 0.288429319858551, |
|
"learning_rate": 8.849315068493152e-06, |
|
"loss": 1.217, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12054794520547946, |
|
"grad_norm": 0.22750040888786316, |
|
"learning_rate": 8.82191780821918e-06, |
|
"loss": 1.2124, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1232876712328767, |
|
"grad_norm": 0.24312719702720642, |
|
"learning_rate": 8.794520547945207e-06, |
|
"loss": 1.2307, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12602739726027398, |
|
"grad_norm": 0.21915780007839203, |
|
"learning_rate": 8.767123287671233e-06, |
|
"loss": 1.2228, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12876712328767123, |
|
"grad_norm": 0.24465875327587128, |
|
"learning_rate": 8.73972602739726e-06, |
|
"loss": 1.2037, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13150684931506848, |
|
"grad_norm": 0.2381676435470581, |
|
"learning_rate": 8.712328767123288e-06, |
|
"loss": 1.2224, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13424657534246576, |
|
"grad_norm": 0.2046230137348175, |
|
"learning_rate": 8.684931506849315e-06, |
|
"loss": 1.1908, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 0.30490127205848694, |
|
"learning_rate": 8.657534246575343e-06, |
|
"loss": 1.1814, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13972602739726028, |
|
"grad_norm": 0.28195858001708984, |
|
"learning_rate": 8.63013698630137e-06, |
|
"loss": 1.2451, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14246575342465753, |
|
"grad_norm": 0.2129937708377838, |
|
"learning_rate": 8.602739726027397e-06, |
|
"loss": 1.1621, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14520547945205478, |
|
"grad_norm": 0.23926354944705963, |
|
"learning_rate": 8.575342465753425e-06, |
|
"loss": 1.111, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14794520547945206, |
|
"grad_norm": 0.2188548892736435, |
|
"learning_rate": 8.547945205479454e-06, |
|
"loss": 1.1413, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1506849315068493, |
|
"grad_norm": 0.1974693238735199, |
|
"learning_rate": 8.520547945205481e-06, |
|
"loss": 1.1219, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15342465753424658, |
|
"grad_norm": 0.23297210037708282, |
|
"learning_rate": 8.493150684931507e-06, |
|
"loss": 1.1239, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15616438356164383, |
|
"grad_norm": 0.18964549899101257, |
|
"learning_rate": 8.465753424657535e-06, |
|
"loss": 1.1562, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1589041095890411, |
|
"grad_norm": 0.3689546287059784, |
|
"learning_rate": 8.438356164383562e-06, |
|
"loss": 1.1757, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16164383561643836, |
|
"grad_norm": 0.20564478635787964, |
|
"learning_rate": 8.41095890410959e-06, |
|
"loss": 1.1523, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1643835616438356, |
|
"grad_norm": 0.18170687556266785, |
|
"learning_rate": 8.383561643835617e-06, |
|
"loss": 1.0871, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16712328767123288, |
|
"grad_norm": 0.20843440294265747, |
|
"learning_rate": 8.356164383561644e-06, |
|
"loss": 1.0996, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16986301369863013, |
|
"grad_norm": 0.19425469636917114, |
|
"learning_rate": 8.328767123287672e-06, |
|
"loss": 1.0944, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1726027397260274, |
|
"grad_norm": 0.20083212852478027, |
|
"learning_rate": 8.3013698630137e-06, |
|
"loss": 1.0991, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17534246575342466, |
|
"grad_norm": 0.25054052472114563, |
|
"learning_rate": 8.273972602739727e-06, |
|
"loss": 1.1203, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1780821917808219, |
|
"grad_norm": 0.20299087464809418, |
|
"learning_rate": 8.246575342465754e-06, |
|
"loss": 1.1124, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18082191780821918, |
|
"grad_norm": 0.1804703027009964, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 1.1004, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18356164383561643, |
|
"grad_norm": 0.18266041576862335, |
|
"learning_rate": 8.19178082191781e-06, |
|
"loss": 1.0923, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1863013698630137, |
|
"grad_norm": 0.20351582765579224, |
|
"learning_rate": 8.164383561643837e-06, |
|
"loss": 1.0851, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.18904109589041096, |
|
"grad_norm": 0.1884656846523285, |
|
"learning_rate": 8.136986301369864e-06, |
|
"loss": 1.1031, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1917808219178082, |
|
"grad_norm": 0.18246056139469147, |
|
"learning_rate": 8.109589041095892e-06, |
|
"loss": 1.0611, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19452054794520549, |
|
"grad_norm": 0.2124541848897934, |
|
"learning_rate": 8.082191780821919e-06, |
|
"loss": 1.1488, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.19726027397260273, |
|
"grad_norm": 0.18398146331310272, |
|
"learning_rate": 8.054794520547946e-06, |
|
"loss": 1.0768, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.18915311992168427, |
|
"learning_rate": 8.027397260273974e-06, |
|
"loss": 1.0895, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20273972602739726, |
|
"grad_norm": 0.2762850224971771, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.0683, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 0.19815456867218018, |
|
"learning_rate": 7.972602739726027e-06, |
|
"loss": 1.0795, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.20821917808219179, |
|
"grad_norm": 0.19643427431583405, |
|
"learning_rate": 7.945205479452055e-06, |
|
"loss": 1.0699, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21095890410958903, |
|
"grad_norm": 0.18533943593502045, |
|
"learning_rate": 7.917808219178082e-06, |
|
"loss": 1.0387, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2136986301369863, |
|
"grad_norm": 0.16974703967571259, |
|
"learning_rate": 7.89041095890411e-06, |
|
"loss": 1.0622, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.21643835616438356, |
|
"grad_norm": 0.1869923621416092, |
|
"learning_rate": 7.863013698630137e-06, |
|
"loss": 1.0452, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 0.1884332299232483, |
|
"learning_rate": 7.835616438356164e-06, |
|
"loss": 1.0677, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2219178082191781, |
|
"grad_norm": 0.1671186238527298, |
|
"learning_rate": 7.808219178082192e-06, |
|
"loss": 1.049, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22465753424657534, |
|
"grad_norm": 0.16851425170898438, |
|
"learning_rate": 7.78082191780822e-06, |
|
"loss": 1.0201, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2273972602739726, |
|
"grad_norm": 0.1602688729763031, |
|
"learning_rate": 7.753424657534248e-06, |
|
"loss": 1.0427, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23013698630136986, |
|
"grad_norm": 0.1668243110179901, |
|
"learning_rate": 7.726027397260276e-06, |
|
"loss": 1.0489, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2328767123287671, |
|
"grad_norm": 0.1518656611442566, |
|
"learning_rate": 7.698630136986302e-06, |
|
"loss": 1.0009, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2356164383561644, |
|
"grad_norm": 0.18698401749134064, |
|
"learning_rate": 7.671232876712329e-06, |
|
"loss": 1.0506, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.23835616438356164, |
|
"grad_norm": 0.1885562390089035, |
|
"learning_rate": 7.643835616438356e-06, |
|
"loss": 1.0571, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2410958904109589, |
|
"grad_norm": 0.21512660384178162, |
|
"learning_rate": 7.616438356164384e-06, |
|
"loss": 1.0736, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24383561643835616, |
|
"grad_norm": 0.19190873205661774, |
|
"learning_rate": 7.589041095890411e-06, |
|
"loss": 1.0291, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2465753424657534, |
|
"grad_norm": 0.16971570253372192, |
|
"learning_rate": 7.561643835616439e-06, |
|
"loss": 1.0251, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2493150684931507, |
|
"grad_norm": 0.16948704421520233, |
|
"learning_rate": 7.534246575342466e-06, |
|
"loss": 1.0073, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25205479452054796, |
|
"grad_norm": 0.15802790224552155, |
|
"learning_rate": 7.506849315068494e-06, |
|
"loss": 1.0061, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2547945205479452, |
|
"grad_norm": 0.18227992951869965, |
|
"learning_rate": 7.479452054794521e-06, |
|
"loss": 1.0229, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.25753424657534246, |
|
"grad_norm": 0.1823369711637497, |
|
"learning_rate": 7.452054794520549e-06, |
|
"loss": 1.0278, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2602739726027397, |
|
"grad_norm": 0.17380163073539734, |
|
"learning_rate": 7.424657534246575e-06, |
|
"loss": 1.021, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.26301369863013696, |
|
"grad_norm": 0.19462458789348602, |
|
"learning_rate": 7.397260273972603e-06, |
|
"loss": 1.0219, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.26575342465753427, |
|
"grad_norm": 0.23451410233974457, |
|
"learning_rate": 7.36986301369863e-06, |
|
"loss": 1.0039, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2684931506849315, |
|
"grad_norm": 0.1824021339416504, |
|
"learning_rate": 7.342465753424658e-06, |
|
"loss": 1.014, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.27123287671232876, |
|
"grad_norm": 0.21500590443611145, |
|
"learning_rate": 7.315068493150685e-06, |
|
"loss": 1.0275, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 0.2002096176147461, |
|
"learning_rate": 7.287671232876713e-06, |
|
"loss": 1.0077, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27671232876712326, |
|
"grad_norm": 0.19200125336647034, |
|
"learning_rate": 7.260273972602741e-06, |
|
"loss": 1.0379, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.27945205479452057, |
|
"grad_norm": 0.19409216940402985, |
|
"learning_rate": 7.232876712328768e-06, |
|
"loss": 1.0256, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2821917808219178, |
|
"grad_norm": 0.21779859066009521, |
|
"learning_rate": 7.205479452054796e-06, |
|
"loss": 1.0345, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.28493150684931506, |
|
"grad_norm": 0.18616290390491486, |
|
"learning_rate": 7.178082191780823e-06, |
|
"loss": 0.9895, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2876712328767123, |
|
"grad_norm": 0.19296467304229736, |
|
"learning_rate": 7.15068493150685e-06, |
|
"loss": 1.0096, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.29041095890410956, |
|
"grad_norm": 0.1683327853679657, |
|
"learning_rate": 7.123287671232877e-06, |
|
"loss": 0.9636, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.29315068493150687, |
|
"grad_norm": 0.17978879809379578, |
|
"learning_rate": 7.095890410958905e-06, |
|
"loss": 0.9975, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2958904109589041, |
|
"grad_norm": 0.16575272381305695, |
|
"learning_rate": 7.068493150684932e-06, |
|
"loss": 1.0139, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.29863013698630136, |
|
"grad_norm": 0.17764893174171448, |
|
"learning_rate": 7.0410958904109596e-06, |
|
"loss": 1.0325, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3013698630136986, |
|
"grad_norm": 0.17832903563976288, |
|
"learning_rate": 7.013698630136987e-06, |
|
"loss": 0.992, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3041095890410959, |
|
"grad_norm": 0.16333316266536713, |
|
"learning_rate": 6.9863013698630145e-06, |
|
"loss": 0.9666, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.30684931506849317, |
|
"grad_norm": 0.18412047624588013, |
|
"learning_rate": 6.958904109589042e-06, |
|
"loss": 0.9947, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.3095890410958904, |
|
"grad_norm": 0.1641516238451004, |
|
"learning_rate": 6.931506849315069e-06, |
|
"loss": 0.9906, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31232876712328766, |
|
"grad_norm": 0.18838484585285187, |
|
"learning_rate": 6.904109589041097e-06, |
|
"loss": 0.9712, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3150684931506849, |
|
"grad_norm": 0.2712058424949646, |
|
"learning_rate": 6.876712328767123e-06, |
|
"loss": 0.966, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3178082191780822, |
|
"grad_norm": 0.18930639326572418, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 1.0194, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.32054794520547947, |
|
"grad_norm": 0.19463950395584106, |
|
"learning_rate": 6.821917808219178e-06, |
|
"loss": 0.946, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3232876712328767, |
|
"grad_norm": 0.18198105692863464, |
|
"learning_rate": 6.794520547945206e-06, |
|
"loss": 0.9759, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.32602739726027397, |
|
"grad_norm": 0.1826743334531784, |
|
"learning_rate": 6.767123287671233e-06, |
|
"loss": 1.0002, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3287671232876712, |
|
"grad_norm": 0.22585777938365936, |
|
"learning_rate": 6.739726027397261e-06, |
|
"loss": 1.0071, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3315068493150685, |
|
"grad_norm": 0.176513671875, |
|
"learning_rate": 6.712328767123288e-06, |
|
"loss": 0.978, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.33424657534246577, |
|
"grad_norm": 0.19568510353565216, |
|
"learning_rate": 6.684931506849316e-06, |
|
"loss": 0.9803, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.336986301369863, |
|
"grad_norm": 0.1907186359167099, |
|
"learning_rate": 6.657534246575343e-06, |
|
"loss": 0.9948, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.33972602739726027, |
|
"grad_norm": 0.21716919541358948, |
|
"learning_rate": 6.630136986301371e-06, |
|
"loss": 0.9942, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 0.1713164895772934, |
|
"learning_rate": 6.602739726027397e-06, |
|
"loss": 0.9707, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3452054794520548, |
|
"grad_norm": 0.4523903429508209, |
|
"learning_rate": 6.5753424657534245e-06, |
|
"loss": 0.975, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.34794520547945207, |
|
"grad_norm": 0.17415928840637207, |
|
"learning_rate": 6.547945205479452e-06, |
|
"loss": 0.9843, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3506849315068493, |
|
"grad_norm": 0.23172074556350708, |
|
"learning_rate": 6.5205479452054794e-06, |
|
"loss": 1.0038, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.35342465753424657, |
|
"grad_norm": 0.1981474608182907, |
|
"learning_rate": 6.493150684931508e-06, |
|
"loss": 0.9421, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3561643835616438, |
|
"grad_norm": 0.20540186762809753, |
|
"learning_rate": 6.465753424657535e-06, |
|
"loss": 1.0132, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3589041095890411, |
|
"grad_norm": 0.19228605926036835, |
|
"learning_rate": 6.438356164383563e-06, |
|
"loss": 0.992, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.36164383561643837, |
|
"grad_norm": 0.26882854104042053, |
|
"learning_rate": 6.41095890410959e-06, |
|
"loss": 0.9719, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3643835616438356, |
|
"grad_norm": 0.18118484318256378, |
|
"learning_rate": 6.3835616438356175e-06, |
|
"loss": 0.9701, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.36712328767123287, |
|
"grad_norm": 0.1885877102613449, |
|
"learning_rate": 6.356164383561645e-06, |
|
"loss": 0.968, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3698630136986301, |
|
"grad_norm": 0.25495269894599915, |
|
"learning_rate": 6.328767123287672e-06, |
|
"loss": 0.9855, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3726027397260274, |
|
"grad_norm": 0.2146862894296646, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 0.9661, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.37534246575342467, |
|
"grad_norm": 0.22623310983181, |
|
"learning_rate": 6.2739726027397265e-06, |
|
"loss": 0.9855, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3780821917808219, |
|
"grad_norm": 0.1829495131969452, |
|
"learning_rate": 6.246575342465754e-06, |
|
"loss": 0.9826, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.38082191780821917, |
|
"grad_norm": 0.18838265538215637, |
|
"learning_rate": 6.219178082191781e-06, |
|
"loss": 0.9738, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3835616438356164, |
|
"grad_norm": 0.18574169278144836, |
|
"learning_rate": 6.191780821917809e-06, |
|
"loss": 0.9629, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3863013698630137, |
|
"grad_norm": 0.18983891606330872, |
|
"learning_rate": 6.164383561643836e-06, |
|
"loss": 0.9921, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.38904109589041097, |
|
"grad_norm": 0.18077009916305542, |
|
"learning_rate": 6.136986301369864e-06, |
|
"loss": 0.9306, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3917808219178082, |
|
"grad_norm": 0.1796853393316269, |
|
"learning_rate": 6.109589041095891e-06, |
|
"loss": 0.9573, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.39452054794520547, |
|
"grad_norm": 0.1963498443365097, |
|
"learning_rate": 6.082191780821919e-06, |
|
"loss": 0.9955, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3972602739726027, |
|
"grad_norm": 0.21260827779769897, |
|
"learning_rate": 6.054794520547945e-06, |
|
"loss": 0.9504, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.17806194722652435, |
|
"learning_rate": 6.027397260273973e-06, |
|
"loss": 0.973, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.40273972602739727, |
|
"grad_norm": 0.2055845409631729, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9075, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4054794520547945, |
|
"grad_norm": 0.22208715975284576, |
|
"learning_rate": 5.972602739726028e-06, |
|
"loss": 0.9425, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.40821917808219177, |
|
"grad_norm": 0.20323306322097778, |
|
"learning_rate": 5.945205479452055e-06, |
|
"loss": 0.9592, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 0.20329202711582184, |
|
"learning_rate": 5.9178082191780825e-06, |
|
"loss": 1.0133, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4136986301369863, |
|
"grad_norm": 0.20176365971565247, |
|
"learning_rate": 5.89041095890411e-06, |
|
"loss": 0.9754, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.41643835616438357, |
|
"grad_norm": 0.23288728296756744, |
|
"learning_rate": 5.863013698630137e-06, |
|
"loss": 0.9037, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4191780821917808, |
|
"grad_norm": 0.18577046692371368, |
|
"learning_rate": 5.835616438356166e-06, |
|
"loss": 0.9741, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.42191780821917807, |
|
"grad_norm": 0.1814967393875122, |
|
"learning_rate": 5.8082191780821915e-06, |
|
"loss": 0.9531, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4246575342465753, |
|
"grad_norm": 0.21100261807441711, |
|
"learning_rate": 5.780821917808219e-06, |
|
"loss": 0.9953, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4273972602739726, |
|
"grad_norm": 0.18429897725582123, |
|
"learning_rate": 5.753424657534246e-06, |
|
"loss": 0.9436, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4301369863013699, |
|
"grad_norm": 0.23114976286888123, |
|
"learning_rate": 5.726027397260274e-06, |
|
"loss": 0.998, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4328767123287671, |
|
"grad_norm": 0.21077217161655426, |
|
"learning_rate": 5.698630136986302e-06, |
|
"loss": 0.9764, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.43561643835616437, |
|
"grad_norm": 0.22999361157417297, |
|
"learning_rate": 5.6712328767123296e-06, |
|
"loss": 0.9563, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 0.19848570227622986, |
|
"learning_rate": 5.643835616438357e-06, |
|
"loss": 0.9689, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4410958904109589, |
|
"grad_norm": 0.18221735954284668, |
|
"learning_rate": 5.6164383561643845e-06, |
|
"loss": 0.9626, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4438356164383562, |
|
"grad_norm": 0.1988019198179245, |
|
"learning_rate": 5.589041095890412e-06, |
|
"loss": 0.9601, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4465753424657534, |
|
"grad_norm": 0.19045473635196686, |
|
"learning_rate": 5.561643835616439e-06, |
|
"loss": 0.9131, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.44931506849315067, |
|
"grad_norm": 0.19012993574142456, |
|
"learning_rate": 5.534246575342466e-06, |
|
"loss": 0.9433, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4520547945205479, |
|
"grad_norm": 0.24261914193630219, |
|
"learning_rate": 5.506849315068493e-06, |
|
"loss": 0.9481, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4547945205479452, |
|
"grad_norm": 0.20674464106559753, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 0.9425, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4575342465753425, |
|
"grad_norm": 0.33073902130126953, |
|
"learning_rate": 5.452054794520548e-06, |
|
"loss": 0.9725, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4602739726027397, |
|
"grad_norm": 0.2034877985715866, |
|
"learning_rate": 5.424657534246576e-06, |
|
"loss": 0.9583, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.46301369863013697, |
|
"grad_norm": 0.2041964828968048, |
|
"learning_rate": 5.397260273972603e-06, |
|
"loss": 0.9782, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4657534246575342, |
|
"grad_norm": 0.19201287627220154, |
|
"learning_rate": 5.369863013698631e-06, |
|
"loss": 0.9079, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4684931506849315, |
|
"grad_norm": 0.26675060391426086, |
|
"learning_rate": 5.342465753424658e-06, |
|
"loss": 0.9524, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4712328767123288, |
|
"grad_norm": 0.23858384788036346, |
|
"learning_rate": 5.3150684931506856e-06, |
|
"loss": 0.9862, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.473972602739726, |
|
"grad_norm": 0.23285381495952606, |
|
"learning_rate": 5.287671232876713e-06, |
|
"loss": 0.9161, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4767123287671233, |
|
"grad_norm": 0.25506582856178284, |
|
"learning_rate": 5.26027397260274e-06, |
|
"loss": 0.9871, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 0.23983027040958405, |
|
"learning_rate": 5.232876712328767e-06, |
|
"loss": 0.9771, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4821917808219178, |
|
"grad_norm": 0.2238221913576126, |
|
"learning_rate": 5.2054794520547945e-06, |
|
"loss": 0.9671, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4849315068493151, |
|
"grad_norm": 0.19868263602256775, |
|
"learning_rate": 5.178082191780822e-06, |
|
"loss": 0.9489, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4876712328767123, |
|
"grad_norm": 0.20326636731624603, |
|
"learning_rate": 5.1506849315068494e-06, |
|
"loss": 0.935, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4904109589041096, |
|
"grad_norm": 0.3353458344936371, |
|
"learning_rate": 5.123287671232877e-06, |
|
"loss": 0.8703, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4931506849315068, |
|
"grad_norm": 0.2603074610233307, |
|
"learning_rate": 5.095890410958904e-06, |
|
"loss": 0.9012, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4958904109589041, |
|
"grad_norm": 0.19554010033607483, |
|
"learning_rate": 5.068493150684932e-06, |
|
"loss": 0.9156, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4986301369863014, |
|
"grad_norm": 0.22466544806957245, |
|
"learning_rate": 5.04109589041096e-06, |
|
"loss": 0.9597, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5013698630136987, |
|
"grad_norm": 0.19898617267608643, |
|
"learning_rate": 5.0136986301369875e-06, |
|
"loss": 0.9036, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5041095890410959, |
|
"grad_norm": 0.21986106038093567, |
|
"learning_rate": 4.986301369863014e-06, |
|
"loss": 0.9262, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5068493150684932, |
|
"grad_norm": 0.20118722319602966, |
|
"learning_rate": 4.958904109589042e-06, |
|
"loss": 0.9374, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5095890410958904, |
|
"grad_norm": 0.21291136741638184, |
|
"learning_rate": 4.931506849315069e-06, |
|
"loss": 0.949, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5123287671232877, |
|
"grad_norm": 0.199756920337677, |
|
"learning_rate": 4.9041095890410965e-06, |
|
"loss": 0.9584, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5150684931506849, |
|
"grad_norm": 0.21030659973621368, |
|
"learning_rate": 4.876712328767124e-06, |
|
"loss": 0.9634, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5178082191780822, |
|
"grad_norm": 0.2223595529794693, |
|
"learning_rate": 4.849315068493151e-06, |
|
"loss": 0.9531, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5205479452054794, |
|
"grad_norm": 0.20592337846755981, |
|
"learning_rate": 4.821917808219179e-06, |
|
"loss": 0.9144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5232876712328767, |
|
"grad_norm": 0.21392260491847992, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 0.9564, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5260273972602739, |
|
"grad_norm": 0.20511546730995178, |
|
"learning_rate": 4.767123287671233e-06, |
|
"loss": 0.949, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5287671232876713, |
|
"grad_norm": 0.23420880734920502, |
|
"learning_rate": 4.73972602739726e-06, |
|
"loss": 0.9267, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5315068493150685, |
|
"grad_norm": 0.20569054782390594, |
|
"learning_rate": 4.712328767123288e-06, |
|
"loss": 0.9576, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5342465753424658, |
|
"grad_norm": 0.21940301358699799, |
|
"learning_rate": 4.684931506849315e-06, |
|
"loss": 0.9238, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.536986301369863, |
|
"grad_norm": 0.21618136763572693, |
|
"learning_rate": 4.657534246575343e-06, |
|
"loss": 0.9185, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5397260273972603, |
|
"grad_norm": 0.21503998339176178, |
|
"learning_rate": 4.63013698630137e-06, |
|
"loss": 0.9709, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5424657534246575, |
|
"grad_norm": 0.20396368205547333, |
|
"learning_rate": 4.602739726027398e-06, |
|
"loss": 0.9, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5452054794520548, |
|
"grad_norm": 0.21770261228084564, |
|
"learning_rate": 4.575342465753425e-06, |
|
"loss": 0.9273, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 0.32311683893203735, |
|
"learning_rate": 4.5479452054794525e-06, |
|
"loss": 0.929, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5506849315068493, |
|
"grad_norm": 0.19623324275016785, |
|
"learning_rate": 4.52054794520548e-06, |
|
"loss": 0.9378, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5534246575342465, |
|
"grad_norm": 0.20517247915267944, |
|
"learning_rate": 4.493150684931507e-06, |
|
"loss": 0.9394, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5561643835616439, |
|
"grad_norm": 0.20876652002334595, |
|
"learning_rate": 4.465753424657535e-06, |
|
"loss": 0.9161, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5589041095890411, |
|
"grad_norm": 0.2046094387769699, |
|
"learning_rate": 4.438356164383562e-06, |
|
"loss": 0.9408, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5616438356164384, |
|
"grad_norm": 0.20203807950019836, |
|
"learning_rate": 4.41095890410959e-06, |
|
"loss": 0.9082, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5643835616438356, |
|
"grad_norm": 0.2046758085489273, |
|
"learning_rate": 4.383561643835616e-06, |
|
"loss": 0.9282, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5671232876712329, |
|
"grad_norm": 0.3580210208892822, |
|
"learning_rate": 4.356164383561644e-06, |
|
"loss": 0.9197, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5698630136986301, |
|
"grad_norm": 0.20845644176006317, |
|
"learning_rate": 4.328767123287671e-06, |
|
"loss": 0.9179, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5726027397260274, |
|
"grad_norm": 0.3450890779495239, |
|
"learning_rate": 4.301369863013699e-06, |
|
"loss": 0.9177, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5753424657534246, |
|
"grad_norm": 0.22718127071857452, |
|
"learning_rate": 4.273972602739727e-06, |
|
"loss": 0.9096, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5780821917808219, |
|
"grad_norm": 0.22152277827262878, |
|
"learning_rate": 4.246575342465754e-06, |
|
"loss": 0.9597, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5808219178082191, |
|
"grad_norm": 0.27429115772247314, |
|
"learning_rate": 4.219178082191781e-06, |
|
"loss": 0.9241, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5835616438356165, |
|
"grad_norm": 0.25996771454811096, |
|
"learning_rate": 4.1917808219178085e-06, |
|
"loss": 0.9333, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5863013698630137, |
|
"grad_norm": 0.23975831270217896, |
|
"learning_rate": 4.164383561643836e-06, |
|
"loss": 0.9241, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.589041095890411, |
|
"grad_norm": 0.22331544756889343, |
|
"learning_rate": 4.136986301369863e-06, |
|
"loss": 0.9479, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5917808219178082, |
|
"grad_norm": 0.45686569809913635, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 0.9185, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5945205479452055, |
|
"grad_norm": 0.22199726104736328, |
|
"learning_rate": 4.082191780821918e-06, |
|
"loss": 0.938, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5972602739726027, |
|
"grad_norm": 0.23028190433979034, |
|
"learning_rate": 4.054794520547946e-06, |
|
"loss": 0.889, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.20550380647182465, |
|
"learning_rate": 4.027397260273973e-06, |
|
"loss": 0.8967, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6027397260273972, |
|
"grad_norm": 0.2272365838289261, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.8845, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6054794520547945, |
|
"grad_norm": 0.21718665957450867, |
|
"learning_rate": 3.972602739726027e-06, |
|
"loss": 0.9252, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6082191780821918, |
|
"grad_norm": 0.36865755915641785, |
|
"learning_rate": 3.945205479452055e-06, |
|
"loss": 0.8968, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6109589041095891, |
|
"grad_norm": 0.31960830092430115, |
|
"learning_rate": 3.917808219178082e-06, |
|
"loss": 0.9072, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6136986301369863, |
|
"grad_norm": 0.20890721678733826, |
|
"learning_rate": 3.89041095890411e-06, |
|
"loss": 0.9113, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 0.4451903700828552, |
|
"learning_rate": 3.863013698630138e-06, |
|
"loss": 0.9219, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6191780821917808, |
|
"grad_norm": 0.2214295119047165, |
|
"learning_rate": 3.8356164383561645e-06, |
|
"loss": 0.9211, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6219178082191781, |
|
"grad_norm": 0.22712253034114838, |
|
"learning_rate": 3.808219178082192e-06, |
|
"loss": 0.9375, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6246575342465753, |
|
"grad_norm": 0.23424097895622253, |
|
"learning_rate": 3.7808219178082194e-06, |
|
"loss": 0.941, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6273972602739726, |
|
"grad_norm": 0.24005015194416046, |
|
"learning_rate": 3.753424657534247e-06, |
|
"loss": 0.9225, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6301369863013698, |
|
"grad_norm": 0.22683826088905334, |
|
"learning_rate": 3.7260273972602743e-06, |
|
"loss": 0.9281, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6328767123287671, |
|
"grad_norm": 0.22614328563213348, |
|
"learning_rate": 3.6986301369863014e-06, |
|
"loss": 0.9527, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6356164383561644, |
|
"grad_norm": 0.21004171669483185, |
|
"learning_rate": 3.671232876712329e-06, |
|
"loss": 0.9169, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6383561643835617, |
|
"grad_norm": 0.24667370319366455, |
|
"learning_rate": 3.6438356164383567e-06, |
|
"loss": 0.9443, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6410958904109589, |
|
"grad_norm": 0.21378229558467865, |
|
"learning_rate": 3.616438356164384e-06, |
|
"loss": 0.9103, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6438356164383562, |
|
"grad_norm": 0.2116130292415619, |
|
"learning_rate": 3.5890410958904116e-06, |
|
"loss": 0.9478, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6465753424657534, |
|
"grad_norm": 0.21133503317832947, |
|
"learning_rate": 3.5616438356164386e-06, |
|
"loss": 0.9019, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6493150684931507, |
|
"grad_norm": 0.2155924141407013, |
|
"learning_rate": 3.534246575342466e-06, |
|
"loss": 0.9044, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6520547945205479, |
|
"grad_norm": 0.2544390857219696, |
|
"learning_rate": 3.5068493150684935e-06, |
|
"loss": 0.8982, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6547945205479452, |
|
"grad_norm": 0.23489639163017273, |
|
"learning_rate": 3.479452054794521e-06, |
|
"loss": 0.9164, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 0.2359784096479416, |
|
"learning_rate": 3.4520547945205484e-06, |
|
"loss": 0.9289, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6602739726027397, |
|
"grad_norm": 0.2401346117258072, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 0.9314, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.663013698630137, |
|
"grad_norm": 0.20693626999855042, |
|
"learning_rate": 3.397260273972603e-06, |
|
"loss": 0.8831, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6657534246575343, |
|
"grad_norm": 0.24384048581123352, |
|
"learning_rate": 3.3698630136986303e-06, |
|
"loss": 0.9291, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6684931506849315, |
|
"grad_norm": 0.19972023367881775, |
|
"learning_rate": 3.342465753424658e-06, |
|
"loss": 0.8982, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6712328767123288, |
|
"grad_norm": 0.20908920466899872, |
|
"learning_rate": 3.3150684931506857e-06, |
|
"loss": 0.889, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.673972602739726, |
|
"grad_norm": 0.34266164898872375, |
|
"learning_rate": 3.2876712328767123e-06, |
|
"loss": 0.8969, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6767123287671233, |
|
"grad_norm": 0.22128839790821075, |
|
"learning_rate": 3.2602739726027397e-06, |
|
"loss": 0.9223, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6794520547945205, |
|
"grad_norm": 0.2235099971294403, |
|
"learning_rate": 3.2328767123287676e-06, |
|
"loss": 0.8973, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6821917808219178, |
|
"grad_norm": 0.25056037306785583, |
|
"learning_rate": 3.205479452054795e-06, |
|
"loss": 0.8951, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 0.22579753398895264, |
|
"learning_rate": 3.1780821917808225e-06, |
|
"loss": 0.9295, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6876712328767123, |
|
"grad_norm": 0.21365728974342346, |
|
"learning_rate": 3.1506849315068495e-06, |
|
"loss": 0.9214, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6904109589041096, |
|
"grad_norm": 0.39435452222824097, |
|
"learning_rate": 3.123287671232877e-06, |
|
"loss": 0.9602, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6931506849315069, |
|
"grad_norm": 0.2443501502275467, |
|
"learning_rate": 3.0958904109589044e-06, |
|
"loss": 0.9151, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6958904109589041, |
|
"grad_norm": 0.3798341751098633, |
|
"learning_rate": 3.068493150684932e-06, |
|
"loss": 0.9714, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6986301369863014, |
|
"grad_norm": 0.31506481766700745, |
|
"learning_rate": 3.0410958904109593e-06, |
|
"loss": 0.9068, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7013698630136986, |
|
"grad_norm": 0.22401128709316254, |
|
"learning_rate": 3.0136986301369864e-06, |
|
"loss": 0.8482, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7041095890410959, |
|
"grad_norm": 0.22684776782989502, |
|
"learning_rate": 2.986301369863014e-06, |
|
"loss": 0.937, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7068493150684931, |
|
"grad_norm": 0.21728773415088654, |
|
"learning_rate": 2.9589041095890413e-06, |
|
"loss": 0.8649, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7095890410958904, |
|
"grad_norm": 0.21507151424884796, |
|
"learning_rate": 2.9315068493150687e-06, |
|
"loss": 0.8983, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7123287671232876, |
|
"grad_norm": 0.22401422262191772, |
|
"learning_rate": 2.9041095890410957e-06, |
|
"loss": 0.9257, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7150684931506849, |
|
"grad_norm": 0.24318191409111023, |
|
"learning_rate": 2.876712328767123e-06, |
|
"loss": 0.9562, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7178082191780822, |
|
"grad_norm": 0.34791508316993713, |
|
"learning_rate": 2.849315068493151e-06, |
|
"loss": 0.9106, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7205479452054795, |
|
"grad_norm": 0.21434104442596436, |
|
"learning_rate": 2.8219178082191785e-06, |
|
"loss": 0.9017, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7232876712328767, |
|
"grad_norm": 0.23609629273414612, |
|
"learning_rate": 2.794520547945206e-06, |
|
"loss": 0.9149, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.726027397260274, |
|
"grad_norm": 0.22214727103710175, |
|
"learning_rate": 2.767123287671233e-06, |
|
"loss": 0.9134, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7287671232876712, |
|
"grad_norm": 0.22469589114189148, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 0.9292, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7315068493150685, |
|
"grad_norm": 0.20773912966251373, |
|
"learning_rate": 2.712328767123288e-06, |
|
"loss": 0.8922, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7342465753424657, |
|
"grad_norm": 0.2090543806552887, |
|
"learning_rate": 2.6849315068493153e-06, |
|
"loss": 0.8949, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.736986301369863, |
|
"grad_norm": 0.2849046289920807, |
|
"learning_rate": 2.6575342465753428e-06, |
|
"loss": 0.8872, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7397260273972602, |
|
"grad_norm": 0.20804154872894287, |
|
"learning_rate": 2.63013698630137e-06, |
|
"loss": 0.886, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7424657534246575, |
|
"grad_norm": 0.21984834969043732, |
|
"learning_rate": 2.6027397260273973e-06, |
|
"loss": 0.9071, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7452054794520548, |
|
"grad_norm": 0.2162293642759323, |
|
"learning_rate": 2.5753424657534247e-06, |
|
"loss": 0.9256, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7479452054794521, |
|
"grad_norm": 0.32106906175613403, |
|
"learning_rate": 2.547945205479452e-06, |
|
"loss": 0.8578, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7506849315068493, |
|
"grad_norm": 0.2233538180589676, |
|
"learning_rate": 2.52054794520548e-06, |
|
"loss": 0.9269, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 0.2110017091035843, |
|
"learning_rate": 2.493150684931507e-06, |
|
"loss": 0.89, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7561643835616438, |
|
"grad_norm": 0.3380037546157837, |
|
"learning_rate": 2.4657534246575345e-06, |
|
"loss": 0.9096, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7589041095890411, |
|
"grad_norm": 0.21267248690128326, |
|
"learning_rate": 2.438356164383562e-06, |
|
"loss": 0.9168, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7616438356164383, |
|
"grad_norm": 0.23698973655700684, |
|
"learning_rate": 2.4109589041095894e-06, |
|
"loss": 0.9307, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7643835616438356, |
|
"grad_norm": 0.2794507145881653, |
|
"learning_rate": 2.3835616438356164e-06, |
|
"loss": 0.9187, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7671232876712328, |
|
"grad_norm": 0.24848654866218567, |
|
"learning_rate": 2.356164383561644e-06, |
|
"loss": 0.8992, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7698630136986301, |
|
"grad_norm": 0.2218470573425293, |
|
"learning_rate": 2.3287671232876713e-06, |
|
"loss": 0.9302, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7726027397260274, |
|
"grad_norm": 0.22873690724372864, |
|
"learning_rate": 2.301369863013699e-06, |
|
"loss": 0.9113, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7753424657534247, |
|
"grad_norm": 0.2365490347146988, |
|
"learning_rate": 2.2739726027397262e-06, |
|
"loss": 0.9052, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7780821917808219, |
|
"grad_norm": 0.24085691571235657, |
|
"learning_rate": 2.2465753424657537e-06, |
|
"loss": 0.8646, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7808219178082192, |
|
"grad_norm": 0.33487242460250854, |
|
"learning_rate": 2.219178082191781e-06, |
|
"loss": 0.9163, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7835616438356164, |
|
"grad_norm": 0.2402915209531784, |
|
"learning_rate": 2.191780821917808e-06, |
|
"loss": 0.9007, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7863013698630137, |
|
"grad_norm": 0.2752298414707184, |
|
"learning_rate": 2.1643835616438356e-06, |
|
"loss": 0.9113, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7890410958904109, |
|
"grad_norm": 0.2232140451669693, |
|
"learning_rate": 2.1369863013698635e-06, |
|
"loss": 0.9074, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7917808219178082, |
|
"grad_norm": 0.2432282269001007, |
|
"learning_rate": 2.1095890410958905e-06, |
|
"loss": 0.9163, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7945205479452054, |
|
"grad_norm": 0.2625906467437744, |
|
"learning_rate": 2.082191780821918e-06, |
|
"loss": 0.9018, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7972602739726027, |
|
"grad_norm": 0.22145821154117584, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"loss": 0.891, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.3327736556529999, |
|
"learning_rate": 2.027397260273973e-06, |
|
"loss": 0.9484, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8027397260273973, |
|
"grad_norm": 0.40401315689086914, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9341, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8054794520547945, |
|
"grad_norm": 0.2722069323062897, |
|
"learning_rate": 1.9726027397260274e-06, |
|
"loss": 0.8993, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8082191780821918, |
|
"grad_norm": 0.28469568490982056, |
|
"learning_rate": 1.945205479452055e-06, |
|
"loss": 0.8949, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.810958904109589, |
|
"grad_norm": 0.22787445783615112, |
|
"learning_rate": 1.9178082191780823e-06, |
|
"loss": 0.9105, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8136986301369863, |
|
"grad_norm": 0.3806058168411255, |
|
"learning_rate": 1.8904109589041097e-06, |
|
"loss": 0.9281, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8164383561643835, |
|
"grad_norm": 0.2987535297870636, |
|
"learning_rate": 1.8630136986301372e-06, |
|
"loss": 0.896, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8191780821917808, |
|
"grad_norm": 0.22943800687789917, |
|
"learning_rate": 1.8356164383561644e-06, |
|
"loss": 0.8471, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 0.26283594965934753, |
|
"learning_rate": 1.808219178082192e-06, |
|
"loss": 0.9124, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8246575342465754, |
|
"grad_norm": 0.2221825271844864, |
|
"learning_rate": 1.7808219178082193e-06, |
|
"loss": 0.9313, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8273972602739726, |
|
"grad_norm": 0.24094314873218536, |
|
"learning_rate": 1.7534246575342468e-06, |
|
"loss": 0.9072, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8301369863013699, |
|
"grad_norm": 0.28791072964668274, |
|
"learning_rate": 1.7260273972602742e-06, |
|
"loss": 0.9675, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8328767123287671, |
|
"grad_norm": 0.36563360691070557, |
|
"learning_rate": 1.6986301369863014e-06, |
|
"loss": 0.9283, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8356164383561644, |
|
"grad_norm": 0.2258368879556656, |
|
"learning_rate": 1.671232876712329e-06, |
|
"loss": 0.925, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8383561643835616, |
|
"grad_norm": 0.2386874109506607, |
|
"learning_rate": 1.6438356164383561e-06, |
|
"loss": 0.8992, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8410958904109589, |
|
"grad_norm": 0.22928331792354584, |
|
"learning_rate": 1.6164383561643838e-06, |
|
"loss": 0.852, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8438356164383561, |
|
"grad_norm": 0.21329520642757416, |
|
"learning_rate": 1.5890410958904112e-06, |
|
"loss": 0.8784, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8465753424657534, |
|
"grad_norm": 0.3607523739337921, |
|
"learning_rate": 1.5616438356164385e-06, |
|
"loss": 0.9591, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8493150684931506, |
|
"grad_norm": 0.26816117763519287, |
|
"learning_rate": 1.534246575342466e-06, |
|
"loss": 0.9056, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.852054794520548, |
|
"grad_norm": 0.30916866660118103, |
|
"learning_rate": 1.5068493150684932e-06, |
|
"loss": 0.9043, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8547945205479452, |
|
"grad_norm": 0.21805380284786224, |
|
"learning_rate": 1.4794520547945206e-06, |
|
"loss": 0.882, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8575342465753425, |
|
"grad_norm": 0.2367381602525711, |
|
"learning_rate": 1.4520547945205479e-06, |
|
"loss": 0.8988, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8602739726027397, |
|
"grad_norm": 0.368033766746521, |
|
"learning_rate": 1.4246575342465755e-06, |
|
"loss": 0.897, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.863013698630137, |
|
"grad_norm": 0.2315363585948944, |
|
"learning_rate": 1.397260273972603e-06, |
|
"loss": 0.8989, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8657534246575342, |
|
"grad_norm": 0.21146391332149506, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 0.9054, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8684931506849315, |
|
"grad_norm": 0.22160471975803375, |
|
"learning_rate": 1.3424657534246577e-06, |
|
"loss": 0.9027, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8712328767123287, |
|
"grad_norm": 0.2119206190109253, |
|
"learning_rate": 1.315068493150685e-06, |
|
"loss": 0.8721, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.873972602739726, |
|
"grad_norm": 0.2633841931819916, |
|
"learning_rate": 1.2876712328767124e-06, |
|
"loss": 0.9339, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 0.21679936349391937, |
|
"learning_rate": 1.26027397260274e-06, |
|
"loss": 0.9214, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8794520547945206, |
|
"grad_norm": 0.24844442307949066, |
|
"learning_rate": 1.2328767123287673e-06, |
|
"loss": 0.9308, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8821917808219178, |
|
"grad_norm": 0.22435534000396729, |
|
"learning_rate": 1.2054794520547947e-06, |
|
"loss": 0.9167, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8849315068493151, |
|
"grad_norm": 0.21692365407943726, |
|
"learning_rate": 1.178082191780822e-06, |
|
"loss": 0.9204, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8876712328767123, |
|
"grad_norm": 0.22375880181789398, |
|
"learning_rate": 1.1506849315068494e-06, |
|
"loss": 0.8826, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 0.22138787806034088, |
|
"learning_rate": 1.1232876712328769e-06, |
|
"loss": 0.8767, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8931506849315068, |
|
"grad_norm": 0.22856874763965607, |
|
"learning_rate": 1.095890410958904e-06, |
|
"loss": 0.9276, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8958904109589041, |
|
"grad_norm": 0.3409179449081421, |
|
"learning_rate": 1.0684931506849318e-06, |
|
"loss": 0.8985, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8986301369863013, |
|
"grad_norm": 0.25110721588134766, |
|
"learning_rate": 1.041095890410959e-06, |
|
"loss": 0.9098, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9013698630136986, |
|
"grad_norm": 0.25154635310173035, |
|
"learning_rate": 1.0136986301369864e-06, |
|
"loss": 0.8754, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9041095890410958, |
|
"grad_norm": 0.25457221269607544, |
|
"learning_rate": 9.863013698630137e-07, |
|
"loss": 0.8919, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9068493150684932, |
|
"grad_norm": 0.33413681387901306, |
|
"learning_rate": 9.589041095890411e-07, |
|
"loss": 0.9269, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9095890410958904, |
|
"grad_norm": 0.23358331620693207, |
|
"learning_rate": 9.315068493150686e-07, |
|
"loss": 0.9136, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9123287671232877, |
|
"grad_norm": 0.2589406967163086, |
|
"learning_rate": 9.04109589041096e-07, |
|
"loss": 0.9139, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.915068493150685, |
|
"grad_norm": 0.23020796477794647, |
|
"learning_rate": 8.767123287671234e-07, |
|
"loss": 0.9322, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9178082191780822, |
|
"grad_norm": 0.23328816890716553, |
|
"learning_rate": 8.493150684931507e-07, |
|
"loss": 0.9158, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9205479452054794, |
|
"grad_norm": 0.2274404615163803, |
|
"learning_rate": 8.219178082191781e-07, |
|
"loss": 0.8759, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9232876712328767, |
|
"grad_norm": 0.2622356712818146, |
|
"learning_rate": 7.945205479452056e-07, |
|
"loss": 0.9109, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9260273972602739, |
|
"grad_norm": 0.3171086311340332, |
|
"learning_rate": 7.67123287671233e-07, |
|
"loss": 0.9115, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9287671232876712, |
|
"grad_norm": 0.22780168056488037, |
|
"learning_rate": 7.397260273972603e-07, |
|
"loss": 0.8892, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9315068493150684, |
|
"grad_norm": 0.2200733870267868, |
|
"learning_rate": 7.123287671232878e-07, |
|
"loss": 0.9372, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9342465753424658, |
|
"grad_norm": 0.23074880242347717, |
|
"learning_rate": 6.849315068493151e-07, |
|
"loss": 0.8906, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.936986301369863, |
|
"grad_norm": 0.22694838047027588, |
|
"learning_rate": 6.575342465753425e-07, |
|
"loss": 0.9225, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9397260273972603, |
|
"grad_norm": 0.2811306118965149, |
|
"learning_rate": 6.3013698630137e-07, |
|
"loss": 0.8761, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9424657534246575, |
|
"grad_norm": 0.2294805943965912, |
|
"learning_rate": 6.027397260273974e-07, |
|
"loss": 0.9325, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9452054794520548, |
|
"grad_norm": 0.2327789068222046, |
|
"learning_rate": 5.753424657534247e-07, |
|
"loss": 0.925, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.947945205479452, |
|
"grad_norm": 0.22292175889015198, |
|
"learning_rate": 5.47945205479452e-07, |
|
"loss": 0.8982, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9506849315068493, |
|
"grad_norm": 0.26683399081230164, |
|
"learning_rate": 5.205479452054795e-07, |
|
"loss": 0.9286, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9534246575342465, |
|
"grad_norm": 0.22458477318286896, |
|
"learning_rate": 4.931506849315068e-07, |
|
"loss": 0.9217, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9561643835616438, |
|
"grad_norm": 0.2236146479845047, |
|
"learning_rate": 4.657534246575343e-07, |
|
"loss": 0.9029, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 0.22380192577838898, |
|
"learning_rate": 4.383561643835617e-07, |
|
"loss": 0.8925, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9616438356164384, |
|
"grad_norm": 0.2150353640317917, |
|
"learning_rate": 4.1095890410958903e-07, |
|
"loss": 0.9262, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9643835616438357, |
|
"grad_norm": 0.22722327709197998, |
|
"learning_rate": 3.835616438356165e-07, |
|
"loss": 0.8768, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9671232876712329, |
|
"grad_norm": 0.2135477215051651, |
|
"learning_rate": 3.561643835616439e-07, |
|
"loss": 0.8852, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9698630136986301, |
|
"grad_norm": 0.21636591851711273, |
|
"learning_rate": 3.2876712328767123e-07, |
|
"loss": 0.879, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9726027397260274, |
|
"grad_norm": 0.2424471527338028, |
|
"learning_rate": 3.013698630136987e-07, |
|
"loss": 0.9134, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9753424657534246, |
|
"grad_norm": 0.2186809778213501, |
|
"learning_rate": 2.73972602739726e-07, |
|
"loss": 0.9169, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9780821917808219, |
|
"grad_norm": 0.24734827876091003, |
|
"learning_rate": 2.465753424657534e-07, |
|
"loss": 0.9114, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9808219178082191, |
|
"grad_norm": 0.23428578674793243, |
|
"learning_rate": 2.1917808219178084e-07, |
|
"loss": 0.9121, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9835616438356164, |
|
"grad_norm": 0.24951869249343872, |
|
"learning_rate": 1.9178082191780824e-07, |
|
"loss": 0.9423, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9863013698630136, |
|
"grad_norm": 0.24284477531909943, |
|
"learning_rate": 1.6438356164383561e-07, |
|
"loss": 0.8857, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.989041095890411, |
|
"grad_norm": 0.23787447810173035, |
|
"learning_rate": 1.36986301369863e-07, |
|
"loss": 0.8878, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9917808219178083, |
|
"grad_norm": 0.2966347634792328, |
|
"learning_rate": 1.0958904109589042e-07, |
|
"loss": 0.9238, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9945205479452055, |
|
"grad_norm": 0.21209554374217987, |
|
"learning_rate": 8.219178082191781e-08, |
|
"loss": 0.8724, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9972602739726028, |
|
"grad_norm": 0.2518068253993988, |
|
"learning_rate": 5.479452054794521e-08, |
|
"loss": 0.9444, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.22552888095378876, |
|
"learning_rate": 2.7397260273972606e-08, |
|
"loss": 0.9042, |
|
"step": 365 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 365, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1950741100198625e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|