|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.60288, |
|
"eval_steps": 500, |
|
"global_step": 471, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00128, |
|
"grad_norm": 2.8870697066158506, |
|
"learning_rate": 0.0, |
|
"loss": 0.8422, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00256, |
|
"grad_norm": 2.88484389829891, |
|
"learning_rate": 6.329113924050633e-07, |
|
"loss": 0.8541, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00384, |
|
"grad_norm": 2.858151965789657, |
|
"learning_rate": 1.2658227848101265e-06, |
|
"loss": 0.8376, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00512, |
|
"grad_norm": 2.759628117182127, |
|
"learning_rate": 1.8987341772151901e-06, |
|
"loss": 0.8334, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0064, |
|
"grad_norm": 2.796990062811218, |
|
"learning_rate": 2.531645569620253e-06, |
|
"loss": 0.8256, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00768, |
|
"grad_norm": 2.5779298795445023, |
|
"learning_rate": 3.1645569620253167e-06, |
|
"loss": 0.8301, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00896, |
|
"grad_norm": 2.182261607936066, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 0.8156, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01024, |
|
"grad_norm": 1.9615896152651355, |
|
"learning_rate": 4.430379746835443e-06, |
|
"loss": 0.7982, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01152, |
|
"grad_norm": 1.452541644948315, |
|
"learning_rate": 5.063291139240506e-06, |
|
"loss": 0.7819, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0128, |
|
"grad_norm": 1.4723286808630864, |
|
"learning_rate": 5.69620253164557e-06, |
|
"loss": 0.7906, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01408, |
|
"grad_norm": 1.3529636617858944, |
|
"learning_rate": 6.329113924050633e-06, |
|
"loss": 0.7724, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01536, |
|
"grad_norm": 1.960737179905222, |
|
"learning_rate": 6.9620253164556965e-06, |
|
"loss": 0.7495, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01664, |
|
"grad_norm": 2.2349101055406337, |
|
"learning_rate": 7.5949367088607605e-06, |
|
"loss": 0.7581, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01792, |
|
"grad_norm": 2.0897577150322477, |
|
"learning_rate": 8.227848101265822e-06, |
|
"loss": 0.7404, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0192, |
|
"grad_norm": 1.8227218322635887, |
|
"learning_rate": 8.860759493670886e-06, |
|
"loss": 0.7382, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02048, |
|
"grad_norm": 1.2099951464458898, |
|
"learning_rate": 9.49367088607595e-06, |
|
"loss": 0.7231, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02176, |
|
"grad_norm": 1.2177037129914572, |
|
"learning_rate": 1.0126582278481012e-05, |
|
"loss": 0.7259, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02304, |
|
"grad_norm": 1.1031346132830708, |
|
"learning_rate": 1.0759493670886076e-05, |
|
"loss": 0.7059, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02432, |
|
"grad_norm": 0.9194779600801882, |
|
"learning_rate": 1.139240506329114e-05, |
|
"loss": 0.7137, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0256, |
|
"grad_norm": 0.8679468005972053, |
|
"learning_rate": 1.2025316455696203e-05, |
|
"loss": 0.7036, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02688, |
|
"grad_norm": 0.7227287276969042, |
|
"learning_rate": 1.2658227848101267e-05, |
|
"loss": 0.696, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02816, |
|
"grad_norm": 0.7425882516811844, |
|
"learning_rate": 1.3291139240506329e-05, |
|
"loss": 0.6888, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02944, |
|
"grad_norm": 0.7093793012252196, |
|
"learning_rate": 1.3924050632911393e-05, |
|
"loss": 0.6791, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03072, |
|
"grad_norm": 0.6018215463147907, |
|
"learning_rate": 1.4556962025316457e-05, |
|
"loss": 0.6783, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 0.5846346732378257, |
|
"learning_rate": 1.5189873417721521e-05, |
|
"loss": 0.6811, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03328, |
|
"grad_norm": 0.5855419788452784, |
|
"learning_rate": 1.5822784810126583e-05, |
|
"loss": 0.683, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03456, |
|
"grad_norm": 0.5096689891724868, |
|
"learning_rate": 1.6455696202531644e-05, |
|
"loss": 0.6589, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03584, |
|
"grad_norm": 0.4871170504081146, |
|
"learning_rate": 1.7088607594936708e-05, |
|
"loss": 0.6582, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03712, |
|
"grad_norm": 0.4949600697144217, |
|
"learning_rate": 1.7721518987341772e-05, |
|
"loss": 0.669, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 0.5082926031630941, |
|
"learning_rate": 1.8354430379746836e-05, |
|
"loss": 0.666, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03968, |
|
"grad_norm": 0.49381475380567175, |
|
"learning_rate": 1.89873417721519e-05, |
|
"loss": 0.6556, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04096, |
|
"grad_norm": 0.4265624784331274, |
|
"learning_rate": 1.962025316455696e-05, |
|
"loss": 0.646, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04224, |
|
"grad_norm": 0.39190416547723717, |
|
"learning_rate": 2.0253164556962025e-05, |
|
"loss": 0.6473, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04352, |
|
"grad_norm": 0.4631353399929371, |
|
"learning_rate": 2.088607594936709e-05, |
|
"loss": 0.6441, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0448, |
|
"grad_norm": 0.3928335126997034, |
|
"learning_rate": 2.1518987341772153e-05, |
|
"loss": 0.6352, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04608, |
|
"grad_norm": 0.36295027582313966, |
|
"learning_rate": 2.2151898734177217e-05, |
|
"loss": 0.6333, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04736, |
|
"grad_norm": 0.35026852064181846, |
|
"learning_rate": 2.278481012658228e-05, |
|
"loss": 0.6399, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04864, |
|
"grad_norm": 0.39778614916835536, |
|
"learning_rate": 2.341772151898734e-05, |
|
"loss": 0.6298, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04992, |
|
"grad_norm": 0.33278348666417684, |
|
"learning_rate": 2.4050632911392405e-05, |
|
"loss": 0.6301, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0512, |
|
"grad_norm": 0.31444068712551376, |
|
"learning_rate": 2.468354430379747e-05, |
|
"loss": 0.6263, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05248, |
|
"grad_norm": 0.36059728676958264, |
|
"learning_rate": 2.5316455696202533e-05, |
|
"loss": 0.6458, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05376, |
|
"grad_norm": 0.3916144552301749, |
|
"learning_rate": 2.5949367088607597e-05, |
|
"loss": 0.6331, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05504, |
|
"grad_norm": 0.32338566356420756, |
|
"learning_rate": 2.6582278481012658e-05, |
|
"loss": 0.6332, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05632, |
|
"grad_norm": 0.33704233729853356, |
|
"learning_rate": 2.7215189873417722e-05, |
|
"loss": 0.6348, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0576, |
|
"grad_norm": 0.36015399213900634, |
|
"learning_rate": 2.7848101265822786e-05, |
|
"loss": 0.6392, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05888, |
|
"grad_norm": 0.31471331803021757, |
|
"learning_rate": 2.848101265822785e-05, |
|
"loss": 0.6272, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06016, |
|
"grad_norm": 0.3225170654156012, |
|
"learning_rate": 2.9113924050632914e-05, |
|
"loss": 0.635, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06144, |
|
"grad_norm": 0.3064473735810606, |
|
"learning_rate": 2.9746835443037974e-05, |
|
"loss": 0.6284, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06272, |
|
"grad_norm": 0.3038289969291092, |
|
"learning_rate": 3.0379746835443042e-05, |
|
"loss": 0.6149, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.3226803690164346, |
|
"learning_rate": 3.10126582278481e-05, |
|
"loss": 0.626, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06528, |
|
"grad_norm": 0.3096398144524693, |
|
"learning_rate": 3.1645569620253167e-05, |
|
"loss": 0.621, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06656, |
|
"grad_norm": 0.2754757429130796, |
|
"learning_rate": 3.227848101265823e-05, |
|
"loss": 0.6185, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06784, |
|
"grad_norm": 0.3262507218160328, |
|
"learning_rate": 3.291139240506329e-05, |
|
"loss": 0.6171, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06912, |
|
"grad_norm": 0.34971068352090656, |
|
"learning_rate": 3.354430379746836e-05, |
|
"loss": 0.616, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0704, |
|
"grad_norm": 0.2841621281043231, |
|
"learning_rate": 3.4177215189873416e-05, |
|
"loss": 0.5995, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07168, |
|
"grad_norm": 0.4003223636484448, |
|
"learning_rate": 3.4810126582278487e-05, |
|
"loss": 0.6169, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07296, |
|
"grad_norm": 0.31868860231705426, |
|
"learning_rate": 3.5443037974683544e-05, |
|
"loss": 0.6077, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.07424, |
|
"grad_norm": 0.3960425782005289, |
|
"learning_rate": 3.607594936708861e-05, |
|
"loss": 0.6164, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07552, |
|
"grad_norm": 0.363865574596696, |
|
"learning_rate": 3.670886075949367e-05, |
|
"loss": 0.6118, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 0.33961478774466697, |
|
"learning_rate": 3.7341772151898736e-05, |
|
"loss": 0.6137, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07808, |
|
"grad_norm": 0.4212164741206082, |
|
"learning_rate": 3.79746835443038e-05, |
|
"loss": 0.6275, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07936, |
|
"grad_norm": 0.29878729710395663, |
|
"learning_rate": 3.8607594936708864e-05, |
|
"loss": 0.6084, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08064, |
|
"grad_norm": 0.36745026817379894, |
|
"learning_rate": 3.924050632911392e-05, |
|
"loss": 0.607, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08192, |
|
"grad_norm": 0.38983571508393644, |
|
"learning_rate": 3.987341772151899e-05, |
|
"loss": 0.6176, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0832, |
|
"grad_norm": 0.37337392917475115, |
|
"learning_rate": 4.050632911392405e-05, |
|
"loss": 0.6184, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.08448, |
|
"grad_norm": 0.3668068115925863, |
|
"learning_rate": 4.113924050632912e-05, |
|
"loss": 0.6194, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08576, |
|
"grad_norm": 0.36138503055306903, |
|
"learning_rate": 4.177215189873418e-05, |
|
"loss": 0.6077, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08704, |
|
"grad_norm": 0.43361127462043814, |
|
"learning_rate": 4.240506329113924e-05, |
|
"loss": 0.6147, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08832, |
|
"grad_norm": 0.33520423726109644, |
|
"learning_rate": 4.3037974683544305e-05, |
|
"loss": 0.6118, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0896, |
|
"grad_norm": 0.4381154362148859, |
|
"learning_rate": 4.367088607594937e-05, |
|
"loss": 0.6031, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09088, |
|
"grad_norm": 0.3717345864324632, |
|
"learning_rate": 4.430379746835443e-05, |
|
"loss": 0.6031, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09216, |
|
"grad_norm": 0.4861728465398392, |
|
"learning_rate": 4.49367088607595e-05, |
|
"loss": 0.6006, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09344, |
|
"grad_norm": 0.3264992939190504, |
|
"learning_rate": 4.556962025316456e-05, |
|
"loss": 0.6151, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.09472, |
|
"grad_norm": 0.4319794925001871, |
|
"learning_rate": 4.6202531645569625e-05, |
|
"loss": 0.6058, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 0.4616345840492333, |
|
"learning_rate": 4.683544303797468e-05, |
|
"loss": 0.5967, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09728, |
|
"grad_norm": 0.4405721152587957, |
|
"learning_rate": 4.7468354430379746e-05, |
|
"loss": 0.6002, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09856, |
|
"grad_norm": 0.5122605377853799, |
|
"learning_rate": 4.810126582278481e-05, |
|
"loss": 0.6076, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09984, |
|
"grad_norm": 0.45313870340097556, |
|
"learning_rate": 4.8734177215189874e-05, |
|
"loss": 0.6074, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.10112, |
|
"grad_norm": 0.4340044755876676, |
|
"learning_rate": 4.936708860759494e-05, |
|
"loss": 0.606, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1024, |
|
"grad_norm": 0.4987172862476422, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6158, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.10368, |
|
"grad_norm": 0.6226880208665108, |
|
"learning_rate": 4.999974965737065e-05, |
|
"loss": 0.621, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.10496, |
|
"grad_norm": 0.5448293131914782, |
|
"learning_rate": 4.999899863449631e-05, |
|
"loss": 0.6014, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.10624, |
|
"grad_norm": 0.3427022601926917, |
|
"learning_rate": 4.999774694641803e-05, |
|
"loss": 0.6198, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.10752, |
|
"grad_norm": 0.5005152113593655, |
|
"learning_rate": 4.999599461820387e-05, |
|
"loss": 0.6054, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1088, |
|
"grad_norm": 0.5702968806820528, |
|
"learning_rate": 4.999374168494844e-05, |
|
"loss": 0.6069, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11008, |
|
"grad_norm": 0.4671310661706222, |
|
"learning_rate": 4.999098819177214e-05, |
|
"loss": 0.6017, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11136, |
|
"grad_norm": 0.46081768174689064, |
|
"learning_rate": 4.9987734193820324e-05, |
|
"loss": 0.5988, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.11264, |
|
"grad_norm": 0.5448729856183013, |
|
"learning_rate": 4.9983979756262136e-05, |
|
"loss": 0.6181, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.11392, |
|
"grad_norm": 0.5095775592779056, |
|
"learning_rate": 4.9979724954289244e-05, |
|
"loss": 0.608, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 0.41119162739543413, |
|
"learning_rate": 4.997496987311431e-05, |
|
"loss": 0.5979, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11648, |
|
"grad_norm": 0.45501958535738946, |
|
"learning_rate": 4.996971460796929e-05, |
|
"loss": 0.6019, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.11776, |
|
"grad_norm": 0.4287172104360816, |
|
"learning_rate": 4.9963959264103544e-05, |
|
"loss": 0.5955, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11904, |
|
"grad_norm": 0.409872269342458, |
|
"learning_rate": 4.995770395678171e-05, |
|
"loss": 0.5927, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12032, |
|
"grad_norm": 0.4304173966206036, |
|
"learning_rate": 4.995094881128138e-05, |
|
"loss": 0.5967, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1216, |
|
"grad_norm": 0.4229799776298517, |
|
"learning_rate": 4.994369396289063e-05, |
|
"loss": 0.6084, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.12288, |
|
"grad_norm": 0.4509596954971553, |
|
"learning_rate": 4.9935939556905295e-05, |
|
"loss": 0.6134, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.12416, |
|
"grad_norm": 0.467661146414229, |
|
"learning_rate": 4.992768574862603e-05, |
|
"loss": 0.5986, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.12544, |
|
"grad_norm": 0.42432875998240194, |
|
"learning_rate": 4.9918932703355256e-05, |
|
"loss": 0.6028, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.12672, |
|
"grad_norm": 0.43479377184835605, |
|
"learning_rate": 4.990968059639379e-05, |
|
"loss": 0.5942, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.3680676685801686, |
|
"learning_rate": 4.989992961303738e-05, |
|
"loss": 0.5994, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12928, |
|
"grad_norm": 0.3956815409903461, |
|
"learning_rate": 4.9889679948572974e-05, |
|
"loss": 0.5871, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13056, |
|
"grad_norm": 0.34354949934586104, |
|
"learning_rate": 4.98789318082748e-05, |
|
"loss": 0.5873, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13184, |
|
"grad_norm": 0.3608260963951222, |
|
"learning_rate": 4.986768540740028e-05, |
|
"loss": 0.5883, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.13312, |
|
"grad_norm": 0.3937004101078116, |
|
"learning_rate": 4.98559409711857e-05, |
|
"loss": 0.6029, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1344, |
|
"grad_norm": 0.3401718481532899, |
|
"learning_rate": 4.9843698734841705e-05, |
|
"loss": 0.5983, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.13568, |
|
"grad_norm": 0.4371868869288284, |
|
"learning_rate": 4.983095894354858e-05, |
|
"loss": 0.5866, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.13696, |
|
"grad_norm": 0.3722813571279646, |
|
"learning_rate": 4.981772185245135e-05, |
|
"loss": 0.5954, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.13824, |
|
"grad_norm": 0.36493596395606354, |
|
"learning_rate": 4.980398772665468e-05, |
|
"loss": 0.5806, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.13952, |
|
"grad_norm": 0.43678937522389644, |
|
"learning_rate": 4.9789756841217546e-05, |
|
"loss": 0.595, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.1408, |
|
"grad_norm": 0.34968596729530604, |
|
"learning_rate": 4.977502948114772e-05, |
|
"loss": 0.5999, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14208, |
|
"grad_norm": 0.4035249077012057, |
|
"learning_rate": 4.9759805941396075e-05, |
|
"loss": 0.582, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.14336, |
|
"grad_norm": 0.3396387531525401, |
|
"learning_rate": 4.974408652685072e-05, |
|
"loss": 0.5912, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.14464, |
|
"grad_norm": 0.3888124435581031, |
|
"learning_rate": 4.9727871552330794e-05, |
|
"loss": 0.5994, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.14592, |
|
"grad_norm": 0.3487289265208422, |
|
"learning_rate": 4.971116134258025e-05, |
|
"loss": 0.598, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1472, |
|
"grad_norm": 0.34084258932596606, |
|
"learning_rate": 4.969395623226133e-05, |
|
"loss": 0.5965, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.14848, |
|
"grad_norm": 0.33211872605390524, |
|
"learning_rate": 4.967625656594782e-05, |
|
"loss": 0.5984, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.14976, |
|
"grad_norm": 0.31055192632357626, |
|
"learning_rate": 4.9658062698118213e-05, |
|
"loss": 0.593, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.15104, |
|
"grad_norm": 0.35790400007793166, |
|
"learning_rate": 4.963937499314857e-05, |
|
"loss": 0.6035, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.15232, |
|
"grad_norm": 0.31118450185510343, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 0.5811, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 0.3326176465041298, |
|
"learning_rate": 4.960051957873725e-05, |
|
"loss": 0.581, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15488, |
|
"grad_norm": 0.30210249377153575, |
|
"learning_rate": 4.958035264746893e-05, |
|
"loss": 0.5837, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.15616, |
|
"grad_norm": 0.3480385124671555, |
|
"learning_rate": 4.955969343539162e-05, |
|
"loss": 0.5768, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.15744, |
|
"grad_norm": 0.3003392569743352, |
|
"learning_rate": 4.9538542356255866e-05, |
|
"loss": 0.5938, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.15872, |
|
"grad_norm": 0.32082565179488104, |
|
"learning_rate": 4.9516899833663e-05, |
|
"loss": 0.5948, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3564349708048278, |
|
"learning_rate": 4.949476630105669e-05, |
|
"loss": 0.595, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16128, |
|
"grad_norm": 0.32049541972124757, |
|
"learning_rate": 4.94721422017143e-05, |
|
"loss": 0.5838, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.16256, |
|
"grad_norm": 0.3317680882353993, |
|
"learning_rate": 4.944902798873794e-05, |
|
"loss": 0.5952, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.16384, |
|
"grad_norm": 0.3381465061198974, |
|
"learning_rate": 4.942542412504543e-05, |
|
"loss": 0.6004, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.16512, |
|
"grad_norm": 0.38351657127693595, |
|
"learning_rate": 4.940133108336105e-05, |
|
"loss": 0.6014, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.1664, |
|
"grad_norm": 0.3276142738951724, |
|
"learning_rate": 4.9376749346206006e-05, |
|
"loss": 0.5853, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16768, |
|
"grad_norm": 0.37146400882939534, |
|
"learning_rate": 4.935167940588887e-05, |
|
"loss": 0.5995, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.16896, |
|
"grad_norm": 0.32804274509201087, |
|
"learning_rate": 4.9326121764495596e-05, |
|
"loss": 0.5955, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17024, |
|
"grad_norm": 0.3344845806030499, |
|
"learning_rate": 4.9300076933879574e-05, |
|
"loss": 0.5818, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.17152, |
|
"grad_norm": 0.3479572078392269, |
|
"learning_rate": 4.92735454356513e-05, |
|
"loss": 0.5941, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1728, |
|
"grad_norm": 0.34868252062960353, |
|
"learning_rate": 4.924652780116799e-05, |
|
"loss": 0.5898, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.17408, |
|
"grad_norm": 0.35674279058993497, |
|
"learning_rate": 4.921902457152289e-05, |
|
"loss": 0.5899, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.17536, |
|
"grad_norm": 0.3672614416380493, |
|
"learning_rate": 4.9191036297534454e-05, |
|
"loss": 0.585, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.17664, |
|
"grad_norm": 0.4039478601084677, |
|
"learning_rate": 4.916256353973535e-05, |
|
"loss": 0.5994, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.17792, |
|
"grad_norm": 0.3428958061155067, |
|
"learning_rate": 4.913360686836117e-05, |
|
"loss": 0.575, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1792, |
|
"grad_norm": 0.4024960602256603, |
|
"learning_rate": 4.910416686333906e-05, |
|
"loss": 0.5913, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18048, |
|
"grad_norm": 0.31040065034832104, |
|
"learning_rate": 4.907424411427608e-05, |
|
"loss": 0.5761, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.18176, |
|
"grad_norm": 0.359237099401051, |
|
"learning_rate": 4.90438392204474e-05, |
|
"loss": 0.5885, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.18304, |
|
"grad_norm": 0.3357545415879296, |
|
"learning_rate": 4.901295279078431e-05, |
|
"loss": 0.5907, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.18432, |
|
"grad_norm": 0.2846403022642179, |
|
"learning_rate": 4.898158544386201e-05, |
|
"loss": 0.5886, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1856, |
|
"grad_norm": 0.3636245125193307, |
|
"learning_rate": 4.894973780788722e-05, |
|
"loss": 0.5816, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.18688, |
|
"grad_norm": 0.25440894793562924, |
|
"learning_rate": 4.8917410520685635e-05, |
|
"loss": 0.576, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.18816, |
|
"grad_norm": 0.3380189678855273, |
|
"learning_rate": 4.888460422968908e-05, |
|
"loss": 0.5931, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.18944, |
|
"grad_norm": 0.3096794617975588, |
|
"learning_rate": 4.885131959192262e-05, |
|
"loss": 0.5829, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.19072, |
|
"grad_norm": 0.280174710159943, |
|
"learning_rate": 4.881755727399134e-05, |
|
"loss": 0.5794, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.31769340776297994, |
|
"learning_rate": 4.878331795206705e-05, |
|
"loss": 0.5729, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19328, |
|
"grad_norm": 0.31671973855902796, |
|
"learning_rate": 4.8748602311874694e-05, |
|
"loss": 0.5905, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.19456, |
|
"grad_norm": 0.32614211009906474, |
|
"learning_rate": 4.8713411048678635e-05, |
|
"loss": 0.5855, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.19584, |
|
"grad_norm": 0.29921149443441614, |
|
"learning_rate": 4.8677744867268764e-05, |
|
"loss": 0.5779, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.19712, |
|
"grad_norm": 0.3558339409344647, |
|
"learning_rate": 4.8641604481946314e-05, |
|
"loss": 0.5892, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1984, |
|
"grad_norm": 0.285079025062, |
|
"learning_rate": 4.8604990616509616e-05, |
|
"loss": 0.5912, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.19968, |
|
"grad_norm": 0.32189736402098207, |
|
"learning_rate": 4.856790400423958e-05, |
|
"loss": 0.5881, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.20096, |
|
"grad_norm": 0.3293153125716864, |
|
"learning_rate": 4.8530345387885004e-05, |
|
"loss": 0.5679, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.20224, |
|
"grad_norm": 0.3020107126026594, |
|
"learning_rate": 4.849231551964771e-05, |
|
"loss": 0.5892, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.20352, |
|
"grad_norm": 0.37859431920215386, |
|
"learning_rate": 4.845381516116748e-05, |
|
"loss": 0.5752, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.2048, |
|
"grad_norm": 0.3310159661704287, |
|
"learning_rate": 4.841484508350679e-05, |
|
"loss": 0.5746, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.20608, |
|
"grad_norm": 0.29255683283621947, |
|
"learning_rate": 4.837540606713538e-05, |
|
"loss": 0.5822, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.20736, |
|
"grad_norm": 0.35293526712063267, |
|
"learning_rate": 4.83354989019146e-05, |
|
"loss": 0.5872, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.20864, |
|
"grad_norm": 0.31406471157688987, |
|
"learning_rate": 4.829512438708163e-05, |
|
"loss": 0.5837, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.20992, |
|
"grad_norm": 0.3746042872826804, |
|
"learning_rate": 4.8254283331233464e-05, |
|
"loss": 0.5795, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2112, |
|
"grad_norm": 0.36631879902112746, |
|
"learning_rate": 4.821297655231066e-05, |
|
"loss": 0.5917, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.21248, |
|
"grad_norm": 0.31527709590813463, |
|
"learning_rate": 4.817120487758104e-05, |
|
"loss": 0.583, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.21376, |
|
"grad_norm": 0.3311399262333245, |
|
"learning_rate": 4.812896914362309e-05, |
|
"loss": 0.5806, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.21504, |
|
"grad_norm": 0.2797000752887155, |
|
"learning_rate": 4.808627019630917e-05, |
|
"loss": 0.5818, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.21632, |
|
"grad_norm": 0.34099518073149715, |
|
"learning_rate": 4.804310889078861e-05, |
|
"loss": 0.5827, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2176, |
|
"grad_norm": 0.2858567724381781, |
|
"learning_rate": 4.799948609147061e-05, |
|
"loss": 0.574, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21888, |
|
"grad_norm": 0.3714847802025429, |
|
"learning_rate": 4.7955402672006854e-05, |
|
"loss": 0.568, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.22016, |
|
"grad_norm": 0.3629775316062746, |
|
"learning_rate": 4.791085951527408e-05, |
|
"loss": 0.5908, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.22144, |
|
"grad_norm": 0.3308553993376432, |
|
"learning_rate": 4.786585751335637e-05, |
|
"loss": 0.5822, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.22272, |
|
"grad_norm": 0.3583796747287126, |
|
"learning_rate": 4.782039756752727e-05, |
|
"loss": 0.5849, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 0.3200425393735649, |
|
"learning_rate": 4.777448058823179e-05, |
|
"loss": 0.5975, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.22528, |
|
"grad_norm": 0.292626050602806, |
|
"learning_rate": 4.77281074950681e-05, |
|
"loss": 0.5858, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.22656, |
|
"grad_norm": 0.35199899958158143, |
|
"learning_rate": 4.768127921676916e-05, |
|
"loss": 0.5956, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.22784, |
|
"grad_norm": 0.3049078150389153, |
|
"learning_rate": 4.763399669118414e-05, |
|
"loss": 0.5818, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.22912, |
|
"grad_norm": 0.3171759989327936, |
|
"learning_rate": 4.758626086525956e-05, |
|
"loss": 0.5788, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 0.37616755806228974, |
|
"learning_rate": 4.753807269502041e-05, |
|
"loss": 0.5764, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23168, |
|
"grad_norm": 0.3111683206800814, |
|
"learning_rate": 4.748943314555093e-05, |
|
"loss": 0.5904, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.23296, |
|
"grad_norm": 0.33324357602957505, |
|
"learning_rate": 4.744034319097535e-05, |
|
"loss": 0.5837, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.23424, |
|
"grad_norm": 0.3033759415187109, |
|
"learning_rate": 4.739080381443834e-05, |
|
"loss": 0.5872, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.23552, |
|
"grad_norm": 0.31221903432674414, |
|
"learning_rate": 4.734081600808531e-05, |
|
"loss": 0.5788, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2368, |
|
"grad_norm": 0.3521014284242888, |
|
"learning_rate": 4.7290380773042575e-05, |
|
"loss": 0.5876, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.23808, |
|
"grad_norm": 0.310657917751192, |
|
"learning_rate": 4.723949911939728e-05, |
|
"loss": 0.5799, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.23936, |
|
"grad_norm": 0.3571637318567018, |
|
"learning_rate": 4.718817206617718e-05, |
|
"loss": 0.5768, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.24064, |
|
"grad_norm": 0.3019122458668047, |
|
"learning_rate": 4.713640064133025e-05, |
|
"loss": 0.569, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.24192, |
|
"grad_norm": 0.3867595871931151, |
|
"learning_rate": 4.7084185881704037e-05, |
|
"loss": 0.5807, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2432, |
|
"grad_norm": 0.31887858233709804, |
|
"learning_rate": 4.7031528833024976e-05, |
|
"loss": 0.5853, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24448, |
|
"grad_norm": 0.33241965371410676, |
|
"learning_rate": 4.697843054987737e-05, |
|
"loss": 0.5843, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.24576, |
|
"grad_norm": 0.3186271240585135, |
|
"learning_rate": 4.692489209568234e-05, |
|
"loss": 0.58, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.24704, |
|
"grad_norm": 0.27257060138401595, |
|
"learning_rate": 4.687091454267646e-05, |
|
"loss": 0.5782, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.24832, |
|
"grad_norm": 0.3068678257607851, |
|
"learning_rate": 4.681649897189036e-05, |
|
"loss": 0.5715, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2496, |
|
"grad_norm": 0.2895553434845913, |
|
"learning_rate": 4.6761646473126985e-05, |
|
"loss": 0.5864, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.25088, |
|
"grad_norm": 0.3320177973067205, |
|
"learning_rate": 4.670635814493984e-05, |
|
"loss": 0.5895, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.25216, |
|
"grad_norm": 0.28847585573804785, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 0.5906, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.25344, |
|
"grad_norm": 0.36087996937741346, |
|
"learning_rate": 4.6594478438128757e-05, |
|
"loss": 0.5827, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.25472, |
|
"grad_norm": 0.2999930376743691, |
|
"learning_rate": 4.653788930016562e-05, |
|
"loss": 0.5661, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.3488298254178776, |
|
"learning_rate": 4.6480868814055424e-05, |
|
"loss": 0.5793, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25728, |
|
"grad_norm": 0.3489687054350795, |
|
"learning_rate": 4.6423418121770855e-05, |
|
"loss": 0.5804, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.25856, |
|
"grad_norm": 0.32040065196445794, |
|
"learning_rate": 4.636553837390051e-05, |
|
"loss": 0.5885, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.25984, |
|
"grad_norm": 0.3211301436932052, |
|
"learning_rate": 4.630723072962584e-05, |
|
"loss": 0.5712, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.26112, |
|
"grad_norm": 0.359318175552222, |
|
"learning_rate": 4.6248496356697966e-05, |
|
"loss": 0.5708, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.2624, |
|
"grad_norm": 0.28862834349726524, |
|
"learning_rate": 4.618933643141428e-05, |
|
"loss": 0.5761, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.26368, |
|
"grad_norm": 0.37248048616519797, |
|
"learning_rate": 4.6129752138594874e-05, |
|
"loss": 0.5853, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.26496, |
|
"grad_norm": 0.26505735554799875, |
|
"learning_rate": 4.6069744671558835e-05, |
|
"loss": 0.5711, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.26624, |
|
"grad_norm": 0.4313554228926337, |
|
"learning_rate": 4.6009315232100324e-05, |
|
"loss": 0.5805, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.26752, |
|
"grad_norm": 0.25367772845462405, |
|
"learning_rate": 4.5948465030464536e-05, |
|
"loss": 0.5804, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 0.4241960582103298, |
|
"learning_rate": 4.588719528532342e-05, |
|
"loss": 0.5781, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.27008, |
|
"grad_norm": 0.26959374978777695, |
|
"learning_rate": 4.58255072237513e-05, |
|
"loss": 0.5872, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.27136, |
|
"grad_norm": 0.32686893212462864, |
|
"learning_rate": 4.5763402081200294e-05, |
|
"loss": 0.5865, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.27264, |
|
"grad_norm": 0.23434165675561655, |
|
"learning_rate": 4.570088110147559e-05, |
|
"loss": 0.5605, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.27392, |
|
"grad_norm": 0.29221024109189686, |
|
"learning_rate": 4.56379455367105e-05, |
|
"loss": 0.5798, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2752, |
|
"grad_norm": 0.24948843663318818, |
|
"learning_rate": 4.557459664734141e-05, |
|
"loss": 0.5647, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.27648, |
|
"grad_norm": 0.2697506211061829, |
|
"learning_rate": 4.551083570208252e-05, |
|
"loss": 0.574, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.27776, |
|
"grad_norm": 0.26982678855787623, |
|
"learning_rate": 4.544666397790043e-05, |
|
"loss": 0.5769, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.27904, |
|
"grad_norm": 0.26312816652400706, |
|
"learning_rate": 4.538208275998861e-05, |
|
"loss": 0.5794, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.28032, |
|
"grad_norm": 0.23961265146671684, |
|
"learning_rate": 4.531709334174161e-05, |
|
"loss": 0.5713, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.2816, |
|
"grad_norm": 0.2521656675623654, |
|
"learning_rate": 4.5251697024729165e-05, |
|
"loss": 0.5799, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.28288, |
|
"grad_norm": 0.2554557690482637, |
|
"learning_rate": 4.518589511867017e-05, |
|
"loss": 0.5612, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.28416, |
|
"grad_norm": 0.2575994504688328, |
|
"learning_rate": 4.511968894140639e-05, |
|
"loss": 0.5646, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.28544, |
|
"grad_norm": 0.24148588167926954, |
|
"learning_rate": 4.50530798188761e-05, |
|
"loss": 0.5752, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.28672, |
|
"grad_norm": 0.24795970441926052, |
|
"learning_rate": 4.498606908508754e-05, |
|
"loss": 0.5651, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.2594616590711217, |
|
"learning_rate": 4.491865808209215e-05, |
|
"loss": 0.5677, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.28928, |
|
"grad_norm": 0.25674541102342535, |
|
"learning_rate": 4.485084815995778e-05, |
|
"loss": 0.5612, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.29056, |
|
"grad_norm": 0.24985240413836018, |
|
"learning_rate": 4.478264067674155e-05, |
|
"loss": 0.5699, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.29184, |
|
"grad_norm": 0.2565786888027258, |
|
"learning_rate": 4.471403699846272e-05, |
|
"loss": 0.5649, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.29312, |
|
"grad_norm": 0.24372178698061564, |
|
"learning_rate": 4.4645038499075296e-05, |
|
"loss": 0.5727, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2944, |
|
"grad_norm": 0.26957885657754116, |
|
"learning_rate": 4.457564656044056e-05, |
|
"loss": 0.5673, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29568, |
|
"grad_norm": 0.2607171265747611, |
|
"learning_rate": 4.4505862572299315e-05, |
|
"loss": 0.5745, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.29696, |
|
"grad_norm": 0.24863876648291447, |
|
"learning_rate": 4.443568793224415e-05, |
|
"loss": 0.5681, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.29824, |
|
"grad_norm": 0.263062932590105, |
|
"learning_rate": 4.436512404569136e-05, |
|
"loss": 0.5644, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.29952, |
|
"grad_norm": 0.2414635376635183, |
|
"learning_rate": 4.429417232585288e-05, |
|
"loss": 0.5701, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3008, |
|
"grad_norm": 0.25798244731348213, |
|
"learning_rate": 4.422283419370789e-05, |
|
"loss": 0.5749, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.30208, |
|
"grad_norm": 0.2705236848186794, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.5722, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.30336, |
|
"grad_norm": 0.2532310162226822, |
|
"learning_rate": 4.407900441508084e-05, |
|
"loss": 0.5613, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.30464, |
|
"grad_norm": 0.27015507001673206, |
|
"learning_rate": 4.400651564913676e-05, |
|
"loss": 0.5764, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.30592, |
|
"grad_norm": 0.2600356410106743, |
|
"learning_rate": 4.3933646231904504e-05, |
|
"loss": 0.5621, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 0.2947154143151425, |
|
"learning_rate": 4.3860397622769756e-05, |
|
"loss": 0.5776, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.30848, |
|
"grad_norm": 0.278006374370773, |
|
"learning_rate": 4.37867712887125e-05, |
|
"loss": 0.5779, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.30976, |
|
"grad_norm": 0.2737057808752138, |
|
"learning_rate": 4.371276870427753e-05, |
|
"loss": 0.5691, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.31104, |
|
"grad_norm": 0.2779579791922396, |
|
"learning_rate": 4.363839135154497e-05, |
|
"loss": 0.5561, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.31232, |
|
"grad_norm": 0.2900125824723869, |
|
"learning_rate": 4.356364072010059e-05, |
|
"loss": 0.5862, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.3136, |
|
"grad_norm": 0.2864520071161508, |
|
"learning_rate": 4.348851830700593e-05, |
|
"loss": 0.5622, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.31488, |
|
"grad_norm": 0.26378798903912654, |
|
"learning_rate": 4.3413025616768424e-05, |
|
"loss": 0.5709, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.31616, |
|
"grad_norm": 0.3060187732302713, |
|
"learning_rate": 4.333716416131115e-05, |
|
"loss": 0.5669, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.31744, |
|
"grad_norm": 0.29432171614697805, |
|
"learning_rate": 4.3260935459942584e-05, |
|
"loss": 0.5682, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.31872, |
|
"grad_norm": 0.26017905775079875, |
|
"learning_rate": 4.318434103932622e-05, |
|
"loss": 0.5885, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.31505146670137313, |
|
"learning_rate": 4.310738243344996e-05, |
|
"loss": 0.5761, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32128, |
|
"grad_norm": 0.28372810809515076, |
|
"learning_rate": 4.303006118359537e-05, |
|
"loss": 0.5592, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.32256, |
|
"grad_norm": 0.2651991449577939, |
|
"learning_rate": 4.295237883830685e-05, |
|
"loss": 0.5697, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.32384, |
|
"grad_norm": 0.24913000845932673, |
|
"learning_rate": 4.2874336953360616e-05, |
|
"loss": 0.5776, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.32512, |
|
"grad_norm": 0.2430027683011546, |
|
"learning_rate": 4.2795937091733515e-05, |
|
"loss": 0.5677, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3264, |
|
"grad_norm": 0.2568626552616791, |
|
"learning_rate": 4.271718082357175e-05, |
|
"loss": 0.582, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.32768, |
|
"grad_norm": 0.23741828351399039, |
|
"learning_rate": 4.2638069726159424e-05, |
|
"loss": 0.5609, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.32896, |
|
"grad_norm": 0.23355776394833105, |
|
"learning_rate": 4.255860538388694e-05, |
|
"loss": 0.572, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.33024, |
|
"grad_norm": 0.28500006430456526, |
|
"learning_rate": 4.247878938821929e-05, |
|
"loss": 0.5783, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.33152, |
|
"grad_norm": 0.23832612238075213, |
|
"learning_rate": 4.2398623337664176e-05, |
|
"loss": 0.5591, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.3328, |
|
"grad_norm": 0.3027829669410571, |
|
"learning_rate": 4.231810883773999e-05, |
|
"loss": 0.5723, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.33408, |
|
"grad_norm": 0.24742812416416485, |
|
"learning_rate": 4.223724750094366e-05, |
|
"loss": 0.562, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.33536, |
|
"grad_norm": 0.281426189909417, |
|
"learning_rate": 4.215604094671835e-05, |
|
"loss": 0.5711, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.33664, |
|
"grad_norm": 0.3464883424573956, |
|
"learning_rate": 4.207449080142104e-05, |
|
"loss": 0.56, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.33792, |
|
"grad_norm": 0.23536130124235047, |
|
"learning_rate": 4.199259869828998e-05, |
|
"loss": 0.5714, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3392, |
|
"grad_norm": 0.32901239458674364, |
|
"learning_rate": 4.191036627741191e-05, |
|
"loss": 0.5656, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.34048, |
|
"grad_norm": 0.2513197034442368, |
|
"learning_rate": 4.182779518568926e-05, |
|
"loss": 0.5549, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.34176, |
|
"grad_norm": 0.2956698427587275, |
|
"learning_rate": 4.174488707680717e-05, |
|
"loss": 0.565, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.34304, |
|
"grad_norm": 0.2767183631207283, |
|
"learning_rate": 4.1661643611200366e-05, |
|
"loss": 0.5751, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.34432, |
|
"grad_norm": 0.24523313623813994, |
|
"learning_rate": 4.157806645601988e-05, |
|
"loss": 0.5742, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 0.2908460538863046, |
|
"learning_rate": 4.149415728509971e-05, |
|
"loss": 0.5593, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.34688, |
|
"grad_norm": 0.4424923362280487, |
|
"learning_rate": 4.140991777892324e-05, |
|
"loss": 0.5814, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.34816, |
|
"grad_norm": 0.2575686169084144, |
|
"learning_rate": 4.132534962458962e-05, |
|
"loss": 0.5678, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.34944, |
|
"grad_norm": 0.2629805233702073, |
|
"learning_rate": 4.124045451578001e-05, |
|
"loss": 0.5629, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.35072, |
|
"grad_norm": 0.22898476571255702, |
|
"learning_rate": 4.115523415272358e-05, |
|
"loss": 0.569, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 0.2443223155892946, |
|
"learning_rate": 4.1069690242163484e-05, |
|
"loss": 0.5624, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.35328, |
|
"grad_norm": 0.24146954716830435, |
|
"learning_rate": 4.0983824497322755e-05, |
|
"loss": 0.5783, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.35456, |
|
"grad_norm": 0.23532353314194146, |
|
"learning_rate": 4.0897638637869874e-05, |
|
"loss": 0.5846, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.35584, |
|
"grad_norm": 0.21666728547483338, |
|
"learning_rate": 4.0811134389884433e-05, |
|
"loss": 0.5788, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.35712, |
|
"grad_norm": 0.2296647805474214, |
|
"learning_rate": 4.07243134858225e-05, |
|
"loss": 0.5679, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.3584, |
|
"grad_norm": 0.23340342966367908, |
|
"learning_rate": 4.063717766448194e-05, |
|
"loss": 0.5645, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35968, |
|
"grad_norm": 0.23757239204061076, |
|
"learning_rate": 4.05497286709676e-05, |
|
"loss": 0.5745, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.36096, |
|
"grad_norm": 0.24284616004111326, |
|
"learning_rate": 4.0461968256656376e-05, |
|
"loss": 0.5659, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.36224, |
|
"grad_norm": 0.2317609526061923, |
|
"learning_rate": 4.037389817916208e-05, |
|
"loss": 0.5657, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.36352, |
|
"grad_norm": 0.2779219267379158, |
|
"learning_rate": 4.028552020230031e-05, |
|
"loss": 0.5612, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3648, |
|
"grad_norm": 0.2568685752720875, |
|
"learning_rate": 4.019683609605305e-05, |
|
"loss": 0.5618, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.36608, |
|
"grad_norm": 0.24357977645804754, |
|
"learning_rate": 4.010784763653331e-05, |
|
"loss": 0.5532, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.36736, |
|
"grad_norm": 0.2854087705449231, |
|
"learning_rate": 4.001855660594948e-05, |
|
"loss": 0.5598, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.36864, |
|
"grad_norm": 0.27153778848296095, |
|
"learning_rate": 3.9928964792569655e-05, |
|
"loss": 0.5661, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.36992, |
|
"grad_norm": 0.3013570035330345, |
|
"learning_rate": 3.983907399068587e-05, |
|
"loss": 0.576, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.3712, |
|
"grad_norm": 0.23579225525517492, |
|
"learning_rate": 3.974888600057808e-05, |
|
"loss": 0.5685, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.37248, |
|
"grad_norm": 0.29344672148557993, |
|
"learning_rate": 3.965840262847818e-05, |
|
"loss": 0.5652, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.37376, |
|
"grad_norm": 0.2771114973156099, |
|
"learning_rate": 3.956762568653378e-05, |
|
"loss": 0.5615, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.37504, |
|
"grad_norm": 0.2475727777258234, |
|
"learning_rate": 3.947655699277197e-05, |
|
"loss": 0.5622, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.37632, |
|
"grad_norm": 0.3283313482469374, |
|
"learning_rate": 3.9385198371062845e-05, |
|
"loss": 0.5688, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.3776, |
|
"grad_norm": 0.2567128249078116, |
|
"learning_rate": 3.929355165108299e-05, |
|
"loss": 0.5685, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.37888, |
|
"grad_norm": 0.33868790476694477, |
|
"learning_rate": 3.920161866827889e-05, |
|
"loss": 0.5603, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.38016, |
|
"grad_norm": 0.23159577247265578, |
|
"learning_rate": 3.910940126383013e-05, |
|
"loss": 0.5614, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.38144, |
|
"grad_norm": 0.261651084842406, |
|
"learning_rate": 3.9016901284612474e-05, |
|
"loss": 0.5634, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.38272, |
|
"grad_norm": 0.25918417083311507, |
|
"learning_rate": 3.8924120583160985e-05, |
|
"loss": 0.564, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.24530554345595257, |
|
"learning_rate": 3.883106101763285e-05, |
|
"loss": 0.5751, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38528, |
|
"grad_norm": 0.25269762220161, |
|
"learning_rate": 3.873772445177015e-05, |
|
"loss": 0.5666, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.38656, |
|
"grad_norm": 0.2686509936643206, |
|
"learning_rate": 3.8644112754862614e-05, |
|
"loss": 0.5787, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.38784, |
|
"grad_norm": 0.2306788708136305, |
|
"learning_rate": 3.85502278017101e-05, |
|
"loss": 0.5639, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.38912, |
|
"grad_norm": 0.25921338779127046, |
|
"learning_rate": 3.84560714725851e-05, |
|
"loss": 0.5611, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.3904, |
|
"grad_norm": 0.2875441041443726, |
|
"learning_rate": 3.8361645653195026e-05, |
|
"loss": 0.5641, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.39168, |
|
"grad_norm": 0.24679004673798802, |
|
"learning_rate": 3.8266952234644545e-05, |
|
"loss": 0.5696, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.39296, |
|
"grad_norm": 0.32137055545965876, |
|
"learning_rate": 3.817199311339759e-05, |
|
"loss": 0.5642, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.39424, |
|
"grad_norm": 0.25511061251478745, |
|
"learning_rate": 3.807677019123944e-05, |
|
"loss": 0.5535, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.39552, |
|
"grad_norm": 0.28918335928655264, |
|
"learning_rate": 3.798128537523865e-05, |
|
"loss": 0.5628, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"grad_norm": 0.24654149918381577, |
|
"learning_rate": 3.7885540577708804e-05, |
|
"loss": 0.5693, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.39808, |
|
"grad_norm": 0.29445546859209903, |
|
"learning_rate": 3.7789537716170256e-05, |
|
"loss": 0.5603, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.39936, |
|
"grad_norm": 0.2546352881022481, |
|
"learning_rate": 3.76932787133117e-05, |
|
"loss": 0.5605, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.40064, |
|
"grad_norm": 0.2694209869990075, |
|
"learning_rate": 3.759676549695168e-05, |
|
"loss": 0.5631, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.40192, |
|
"grad_norm": 0.25374147038521105, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.5737, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.4032, |
|
"grad_norm": 0.2692940305682822, |
|
"learning_rate": 3.740298416041898e-05, |
|
"loss": 0.5592, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.40448, |
|
"grad_norm": 0.27292894144562746, |
|
"learning_rate": 3.730571992118462e-05, |
|
"loss": 0.56, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.40576, |
|
"grad_norm": 0.2831296008226712, |
|
"learning_rate": 3.720820923024778e-05, |
|
"loss": 0.556, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.40704, |
|
"grad_norm": 0.24842153386433, |
|
"learning_rate": 3.711045404049507e-05, |
|
"loss": 0.5615, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.40832, |
|
"grad_norm": 0.25137316651759817, |
|
"learning_rate": 3.701245630970979e-05, |
|
"loss": 0.5519, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.4096, |
|
"grad_norm": 0.256842976111285, |
|
"learning_rate": 3.69142180005327e-05, |
|
"loss": 0.559, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.41088, |
|
"grad_norm": 0.23856265603702867, |
|
"learning_rate": 3.681574108042274e-05, |
|
"loss": 0.5513, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.41216, |
|
"grad_norm": 0.2636505871798519, |
|
"learning_rate": 3.6717027521617595e-05, |
|
"loss": 0.5553, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.41344, |
|
"grad_norm": 0.23126506975873334, |
|
"learning_rate": 3.6618079301094216e-05, |
|
"loss": 0.5535, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.41472, |
|
"grad_norm": 0.2765627545708339, |
|
"learning_rate": 3.6518898400529214e-05, |
|
"loss": 0.5727, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 0.25065312946741247, |
|
"learning_rate": 3.6419486806259194e-05, |
|
"loss": 0.5642, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.41728, |
|
"grad_norm": 0.24801934318992586, |
|
"learning_rate": 3.631984650924094e-05, |
|
"loss": 0.5489, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.41856, |
|
"grad_norm": 0.2505101313201257, |
|
"learning_rate": 3.621997950501156e-05, |
|
"loss": 0.5575, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.41984, |
|
"grad_norm": 0.22423685749771013, |
|
"learning_rate": 3.611988779364853e-05, |
|
"loss": 0.5468, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.42112, |
|
"grad_norm": 0.2613413831880175, |
|
"learning_rate": 3.6019573379729643e-05, |
|
"loss": 0.5742, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 0.23270975616366224, |
|
"learning_rate": 3.591903827229282e-05, |
|
"loss": 0.567, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.42368, |
|
"grad_norm": 0.22747265941762546, |
|
"learning_rate": 3.5818284484795904e-05, |
|
"loss": 0.5521, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.42496, |
|
"grad_norm": 0.21305915578050127, |
|
"learning_rate": 3.5717314035076355e-05, |
|
"loss": 0.5507, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.42624, |
|
"grad_norm": 0.22564541882328742, |
|
"learning_rate": 3.56161289453108e-05, |
|
"loss": 0.5612, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.42752, |
|
"grad_norm": 0.24927047862972154, |
|
"learning_rate": 3.5514731241974544e-05, |
|
"loss": 0.5511, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.4288, |
|
"grad_norm": 0.23391865199996986, |
|
"learning_rate": 3.5413122955801005e-05, |
|
"loss": 0.5638, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.43008, |
|
"grad_norm": 0.2538820004383474, |
|
"learning_rate": 3.5311306121741015e-05, |
|
"loss": 0.5629, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.43136, |
|
"grad_norm": 0.23071027016549392, |
|
"learning_rate": 3.5209282778922106e-05, |
|
"loss": 0.5524, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.43264, |
|
"grad_norm": 0.2538902369228176, |
|
"learning_rate": 3.510705497060762e-05, |
|
"loss": 0.5589, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.43392, |
|
"grad_norm": 0.2396787654237747, |
|
"learning_rate": 3.500462474415584e-05, |
|
"loss": 0.5583, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.4352, |
|
"grad_norm": 0.24498876339943568, |
|
"learning_rate": 3.490199415097892e-05, |
|
"loss": 0.5586, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.43648, |
|
"grad_norm": 0.22429707447384664, |
|
"learning_rate": 3.479916524650188e-05, |
|
"loss": 0.5591, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.43776, |
|
"grad_norm": 0.2488615974495946, |
|
"learning_rate": 3.4696140090121376e-05, |
|
"loss": 0.5716, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.43904, |
|
"grad_norm": 0.24601068839661924, |
|
"learning_rate": 3.459292074516449e-05, |
|
"loss": 0.572, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.44032, |
|
"grad_norm": 0.24772654051656678, |
|
"learning_rate": 3.4489509278847414e-05, |
|
"loss": 0.5627, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.4416, |
|
"grad_norm": 0.24320980786790716, |
|
"learning_rate": 3.4385907762234e-05, |
|
"loss": 0.556, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.44288, |
|
"grad_norm": 0.2899880667030433, |
|
"learning_rate": 3.428211827019434e-05, |
|
"loss": 0.5625, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.44416, |
|
"grad_norm": 0.24951619342805165, |
|
"learning_rate": 3.417814288136319e-05, |
|
"loss": 0.5606, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.44544, |
|
"grad_norm": 0.29645948538693856, |
|
"learning_rate": 3.407398367809832e-05, |
|
"loss": 0.5696, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.44672, |
|
"grad_norm": 0.2628573817303425, |
|
"learning_rate": 3.3969642746438836e-05, |
|
"loss": 0.5548, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 0.25156314937857505, |
|
"learning_rate": 3.386512217606339e-05, |
|
"loss": 0.5584, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.44928, |
|
"grad_norm": 0.24416044960818195, |
|
"learning_rate": 3.3760424060248344e-05, |
|
"loss": 0.5501, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.45056, |
|
"grad_norm": 0.2508322157210064, |
|
"learning_rate": 3.365555049582582e-05, |
|
"loss": 0.5575, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.45184, |
|
"grad_norm": 0.23612772269109591, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.5682, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.45312, |
|
"grad_norm": 0.24541798896943803, |
|
"learning_rate": 3.3445285426013685e-05, |
|
"loss": 0.551, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4544, |
|
"grad_norm": 0.2234016182436274, |
|
"learning_rate": 3.3339898131688914e-05, |
|
"loss": 0.5591, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.45568, |
|
"grad_norm": 0.24455388410655837, |
|
"learning_rate": 3.323434381080199e-05, |
|
"loss": 0.5542, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.45696, |
|
"grad_norm": 0.2447461284103842, |
|
"learning_rate": 3.312862457733263e-05, |
|
"loss": 0.5689, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.45824, |
|
"grad_norm": 0.2611063437561967, |
|
"learning_rate": 3.302274254856329e-05, |
|
"loss": 0.5564, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.45952, |
|
"grad_norm": 0.2609646712338443, |
|
"learning_rate": 3.2916699845036816e-05, |
|
"loss": 0.5609, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 0.2668094661495938, |
|
"learning_rate": 3.281049859051394e-05, |
|
"loss": 0.55, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.46208, |
|
"grad_norm": 0.23869480950176758, |
|
"learning_rate": 3.270414091193077e-05, |
|
"loss": 0.5613, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.46336, |
|
"grad_norm": 0.24021036759397824, |
|
"learning_rate": 3.2597628939356175e-05, |
|
"loss": 0.5431, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.46464, |
|
"grad_norm": 0.22964918725760114, |
|
"learning_rate": 3.2490964805949145e-05, |
|
"loss": 0.5568, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.46592, |
|
"grad_norm": 0.22304779121360874, |
|
"learning_rate": 3.238415064791603e-05, |
|
"loss": 0.561, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4672, |
|
"grad_norm": 0.23921210910709523, |
|
"learning_rate": 3.227718860446782e-05, |
|
"loss": 0.5457, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.46848, |
|
"grad_norm": 0.2209452744046842, |
|
"learning_rate": 3.217008081777726e-05, |
|
"loss": 0.5543, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.46976, |
|
"grad_norm": 0.24033224570485728, |
|
"learning_rate": 3.206282943293593e-05, |
|
"loss": 0.5491, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.47104, |
|
"grad_norm": 0.23799116455210376, |
|
"learning_rate": 3.195543659791132e-05, |
|
"loss": 0.5473, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.47232, |
|
"grad_norm": 0.22503904067880973, |
|
"learning_rate": 3.1847904463503816e-05, |
|
"loss": 0.5633, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.4736, |
|
"grad_norm": 0.22994992202231845, |
|
"learning_rate": 3.17402351833036e-05, |
|
"loss": 0.56, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.47488, |
|
"grad_norm": 0.2454490715349298, |
|
"learning_rate": 3.163243091364752e-05, |
|
"loss": 0.5502, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.47616, |
|
"grad_norm": 0.2567184232746229, |
|
"learning_rate": 3.152449381357593e-05, |
|
"loss": 0.5606, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.47744, |
|
"grad_norm": 0.22104709877502515, |
|
"learning_rate": 3.141642604478942e-05, |
|
"loss": 0.5452, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.47872, |
|
"grad_norm": 0.254791340469147, |
|
"learning_rate": 3.130822977160554e-05, |
|
"loss": 0.5575, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.2172815217297066, |
|
"learning_rate": 3.119990716091546e-05, |
|
"loss": 0.5592, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.48128, |
|
"grad_norm": 0.23931030435414483, |
|
"learning_rate": 3.109146038214055e-05, |
|
"loss": 0.5473, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.48256, |
|
"grad_norm": 0.2290268854716801, |
|
"learning_rate": 3.098289160718895e-05, |
|
"loss": 0.5645, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.48384, |
|
"grad_norm": 0.23603315462362215, |
|
"learning_rate": 3.087420301041206e-05, |
|
"loss": 0.5408, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.48512, |
|
"grad_norm": 0.20930260887273594, |
|
"learning_rate": 3.076539676856101e-05, |
|
"loss": 0.5497, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.4864, |
|
"grad_norm": 0.20115083009307477, |
|
"learning_rate": 3.065647506074306e-05, |
|
"loss": 0.565, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.48768, |
|
"grad_norm": 0.24482468983205832, |
|
"learning_rate": 3.054744006837794e-05, |
|
"loss": 0.5682, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.48896, |
|
"grad_norm": 0.21118406075167703, |
|
"learning_rate": 3.0438293975154186e-05, |
|
"loss": 0.5477, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.49024, |
|
"grad_norm": 0.23616813058785022, |
|
"learning_rate": 3.03290389669854e-05, |
|
"loss": 0.5563, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.49152, |
|
"grad_norm": 0.21365522272880177, |
|
"learning_rate": 3.021967723196647e-05, |
|
"loss": 0.5507, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.4928, |
|
"grad_norm": 0.22727170936570085, |
|
"learning_rate": 3.0110210960329725e-05, |
|
"loss": 0.5603, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.49408, |
|
"grad_norm": 0.20731397878172675, |
|
"learning_rate": 3.0000642344401113e-05, |
|
"loss": 0.5563, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.49536, |
|
"grad_norm": 0.21475557099958575, |
|
"learning_rate": 2.9890973578556268e-05, |
|
"loss": 0.552, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.49664, |
|
"grad_norm": 0.26877099196744664, |
|
"learning_rate": 2.978120685917656e-05, |
|
"loss": 0.5409, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.49792, |
|
"grad_norm": 0.21789486858704812, |
|
"learning_rate": 2.9671344384605127e-05, |
|
"loss": 0.5486, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 0.23650494006372377, |
|
"learning_rate": 2.956138835510282e-05, |
|
"loss": 0.5454, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.50048, |
|
"grad_norm": 0.22781069793383732, |
|
"learning_rate": 2.945134097280417e-05, |
|
"loss": 0.5615, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.50176, |
|
"grad_norm": 0.2365389525743412, |
|
"learning_rate": 2.9341204441673266e-05, |
|
"loss": 0.5544, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.50304, |
|
"grad_norm": 0.21603949855780968, |
|
"learning_rate": 2.9230980967459593e-05, |
|
"loss": 0.569, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.50432, |
|
"grad_norm": 0.2037144247370564, |
|
"learning_rate": 2.9120672757653916e-05, |
|
"loss": 0.5497, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.5056, |
|
"grad_norm": 0.23323915671730888, |
|
"learning_rate": 2.9010282021444008e-05, |
|
"loss": 0.5583, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.50688, |
|
"grad_norm": 0.21232243650279164, |
|
"learning_rate": 2.8899810969670448e-05, |
|
"loss": 0.5632, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.50816, |
|
"grad_norm": 0.23946451797290258, |
|
"learning_rate": 2.8789261814782316e-05, |
|
"loss": 0.5593, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.50944, |
|
"grad_norm": 0.2277720908380823, |
|
"learning_rate": 2.8678636770792906e-05, |
|
"loss": 0.5557, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.51072, |
|
"grad_norm": 0.2121131449498051, |
|
"learning_rate": 2.856793805323536e-05, |
|
"loss": 0.5494, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 0.2257692000346662, |
|
"learning_rate": 2.845716787911833e-05, |
|
"loss": 0.5669, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.51328, |
|
"grad_norm": 0.23206067761113425, |
|
"learning_rate": 2.8346328466881545e-05, |
|
"loss": 0.5587, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.51456, |
|
"grad_norm": 0.22901702627476864, |
|
"learning_rate": 2.8235422036351382e-05, |
|
"loss": 0.5719, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.51584, |
|
"grad_norm": 0.2284208784515934, |
|
"learning_rate": 2.812445080869646e-05, |
|
"loss": 0.5608, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.51712, |
|
"grad_norm": 0.21851283140724692, |
|
"learning_rate": 2.8013417006383076e-05, |
|
"loss": 0.5567, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5184, |
|
"grad_norm": 0.21275077693373082, |
|
"learning_rate": 2.7902322853130757e-05, |
|
"loss": 0.545, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.51968, |
|
"grad_norm": 0.2251800050901655, |
|
"learning_rate": 2.77911705738677e-05, |
|
"loss": 0.555, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.52096, |
|
"grad_norm": 0.21241829251347838, |
|
"learning_rate": 2.7679962394686198e-05, |
|
"loss": 0.5581, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.52224, |
|
"grad_norm": 0.21703951180685088, |
|
"learning_rate": 2.756870054279811e-05, |
|
"loss": 0.5488, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.52352, |
|
"grad_norm": 0.22069571037639818, |
|
"learning_rate": 2.745738724649018e-05, |
|
"loss": 0.5535, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.5248, |
|
"grad_norm": 0.25932488146409416, |
|
"learning_rate": 2.7346024735079486e-05, |
|
"loss": 0.5583, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.52608, |
|
"grad_norm": 0.20727956981757625, |
|
"learning_rate": 2.7234615238868732e-05, |
|
"loss": 0.5632, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.52736, |
|
"grad_norm": 0.24066945021398153, |
|
"learning_rate": 2.712316098910162e-05, |
|
"loss": 0.563, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.52864, |
|
"grad_norm": 0.25008559453154683, |
|
"learning_rate": 2.7011664217918154e-05, |
|
"loss": 0.5488, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.52992, |
|
"grad_norm": 0.23013581555834695, |
|
"learning_rate": 2.6900127158309903e-05, |
|
"loss": 0.5633, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.5312, |
|
"grad_norm": 0.22224479213901044, |
|
"learning_rate": 2.6788552044075344e-05, |
|
"loss": 0.549, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.53248, |
|
"grad_norm": 0.2654591019527864, |
|
"learning_rate": 2.667694110977506e-05, |
|
"loss": 0.5574, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.53376, |
|
"grad_norm": 0.19545471307044301, |
|
"learning_rate": 2.656529659068705e-05, |
|
"loss": 0.5488, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.53504, |
|
"grad_norm": 0.20254148474738726, |
|
"learning_rate": 2.6453620722761896e-05, |
|
"loss": 0.5638, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.53632, |
|
"grad_norm": 0.22685700497041825, |
|
"learning_rate": 2.6341915742578037e-05, |
|
"loss": 0.5414, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 0.20942540533409634, |
|
"learning_rate": 2.6230183887296955e-05, |
|
"loss": 0.5565, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.53888, |
|
"grad_norm": 0.21464639120298495, |
|
"learning_rate": 2.6118427394618357e-05, |
|
"loss": 0.5561, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.54016, |
|
"grad_norm": 0.22217802407368148, |
|
"learning_rate": 2.600664850273538e-05, |
|
"loss": 0.5533, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.54144, |
|
"grad_norm": 0.18275865926888668, |
|
"learning_rate": 2.5894849450289764e-05, |
|
"loss": 0.5488, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.54272, |
|
"grad_norm": 0.2167014210634381, |
|
"learning_rate": 2.5783032476327007e-05, |
|
"loss": 0.5461, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 0.2034252786956083, |
|
"learning_rate": 2.5671199820251534e-05, |
|
"loss": 0.5509, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.54528, |
|
"grad_norm": 0.18569564751312728, |
|
"learning_rate": 2.5559353721781832e-05, |
|
"loss": 0.5478, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.54656, |
|
"grad_norm": 0.20022896045042796, |
|
"learning_rate": 2.544749642090561e-05, |
|
"loss": 0.5477, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.54784, |
|
"grad_norm": 0.21037283126869577, |
|
"learning_rate": 2.5335630157834937e-05, |
|
"loss": 0.5539, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.54912, |
|
"grad_norm": 0.19972461705925887, |
|
"learning_rate": 2.522375717296137e-05, |
|
"loss": 0.5569, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.5504, |
|
"grad_norm": 0.19005063735053182, |
|
"learning_rate": 2.5111879706811087e-05, |
|
"loss": 0.5457, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.55168, |
|
"grad_norm": 0.21356314651647584, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.5459, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.55296, |
|
"grad_norm": 0.20945885149378243, |
|
"learning_rate": 2.4888120293188916e-05, |
|
"loss": 0.5492, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.55424, |
|
"grad_norm": 0.2230850015397349, |
|
"learning_rate": 2.4776242827038636e-05, |
|
"loss": 0.5545, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.55552, |
|
"grad_norm": 0.23589759701529497, |
|
"learning_rate": 2.4664369842165068e-05, |
|
"loss": 0.5412, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.5568, |
|
"grad_norm": 0.22581333235197817, |
|
"learning_rate": 2.4552503579094397e-05, |
|
"loss": 0.5435, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.55808, |
|
"grad_norm": 0.24919774658733163, |
|
"learning_rate": 2.4440646278218177e-05, |
|
"loss": 0.545, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.55936, |
|
"grad_norm": 0.2190295213590674, |
|
"learning_rate": 2.4328800179748475e-05, |
|
"loss": 0.5408, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.56064, |
|
"grad_norm": 0.2628486805976341, |
|
"learning_rate": 2.4216967523673e-05, |
|
"loss": 0.5655, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.56192, |
|
"grad_norm": 0.25252172819470975, |
|
"learning_rate": 2.4105150549710238e-05, |
|
"loss": 0.5552, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.5632, |
|
"grad_norm": 0.20229952269202467, |
|
"learning_rate": 2.399335149726463e-05, |
|
"loss": 0.5429, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.56448, |
|
"grad_norm": 0.24469450227466819, |
|
"learning_rate": 2.388157260538165e-05, |
|
"loss": 0.5582, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.56576, |
|
"grad_norm": 0.21164210405032619, |
|
"learning_rate": 2.3769816112703047e-05, |
|
"loss": 0.5506, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.56704, |
|
"grad_norm": 0.22950726772051805, |
|
"learning_rate": 2.365808425742196e-05, |
|
"loss": 0.5497, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.56832, |
|
"grad_norm": 0.23336024035401096, |
|
"learning_rate": 2.3546379277238107e-05, |
|
"loss": 0.5415, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5696, |
|
"grad_norm": 0.19765357154592897, |
|
"learning_rate": 2.3434703409312954e-05, |
|
"loss": 0.5423, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.57088, |
|
"grad_norm": 0.20108815980408287, |
|
"learning_rate": 2.3323058890224938e-05, |
|
"loss": 0.5543, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.57216, |
|
"grad_norm": 0.20801363546967222, |
|
"learning_rate": 2.321144795592467e-05, |
|
"loss": 0.5579, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.57344, |
|
"grad_norm": 0.19116838604162356, |
|
"learning_rate": 2.3099872841690103e-05, |
|
"loss": 0.5418, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.57472, |
|
"grad_norm": 0.18998557353906567, |
|
"learning_rate": 2.2988335782081855e-05, |
|
"loss": 0.5575, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.20390602126403667, |
|
"learning_rate": 2.2876839010898377e-05, |
|
"loss": 0.5517, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.57728, |
|
"grad_norm": 0.20273804136737242, |
|
"learning_rate": 2.2765384761131274e-05, |
|
"loss": 0.5512, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.57856, |
|
"grad_norm": 0.2323345427375102, |
|
"learning_rate": 2.265397526492052e-05, |
|
"loss": 0.5552, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.57984, |
|
"grad_norm": 0.21618001641053508, |
|
"learning_rate": 2.2542612753509823e-05, |
|
"loss": 0.5497, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.58112, |
|
"grad_norm": 0.2089693691856067, |
|
"learning_rate": 2.24312994572019e-05, |
|
"loss": 0.5437, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.5824, |
|
"grad_norm": 0.21321327766142864, |
|
"learning_rate": 2.2320037605313808e-05, |
|
"loss": 0.5457, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.58368, |
|
"grad_norm": 0.1866665167284803, |
|
"learning_rate": 2.2208829426132307e-05, |
|
"loss": 0.5589, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.58496, |
|
"grad_norm": 0.19549868239805784, |
|
"learning_rate": 2.2097677146869242e-05, |
|
"loss": 0.5428, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.58624, |
|
"grad_norm": 0.21465688730228774, |
|
"learning_rate": 2.1986582993616926e-05, |
|
"loss": 0.5409, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.58752, |
|
"grad_norm": 0.19658929521150698, |
|
"learning_rate": 2.1875549191303545e-05, |
|
"loss": 0.5464, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.5888, |
|
"grad_norm": 0.198475793396338, |
|
"learning_rate": 2.1764577963648614e-05, |
|
"loss": 0.5468, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.59008, |
|
"grad_norm": 0.21158010098295732, |
|
"learning_rate": 2.1653671533118468e-05, |
|
"loss": 0.5537, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.59136, |
|
"grad_norm": 0.21320070068715855, |
|
"learning_rate": 2.154283212088168e-05, |
|
"loss": 0.5486, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.59264, |
|
"grad_norm": 0.2212774837957101, |
|
"learning_rate": 2.1432061946764644e-05, |
|
"loss": 0.5381, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.59392, |
|
"grad_norm": 0.19109434465910716, |
|
"learning_rate": 2.1321363229207096e-05, |
|
"loss": 0.5538, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5952, |
|
"grad_norm": 0.194950360975785, |
|
"learning_rate": 2.121073818521769e-05, |
|
"loss": 0.5539, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.59648, |
|
"grad_norm": 0.20628175723560369, |
|
"learning_rate": 2.1100189030329558e-05, |
|
"loss": 0.5414, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.59776, |
|
"grad_norm": 0.19027258987716833, |
|
"learning_rate": 2.098971797855599e-05, |
|
"loss": 0.5465, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.59904, |
|
"grad_norm": 0.1968353186636271, |
|
"learning_rate": 2.0879327242346093e-05, |
|
"loss": 0.5429, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.60032, |
|
"grad_norm": 0.19197267253975747, |
|
"learning_rate": 2.0769019032540416e-05, |
|
"loss": 0.5576, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.6016, |
|
"grad_norm": 0.1989897674210096, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.5421, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.60288, |
|
"grad_norm": 0.2081969628582826, |
|
"learning_rate": 2.054865902719584e-05, |
|
"loss": 0.567, |
|
"step": 471 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 781, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 157, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 584441230589952.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|