|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 538240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018579072532699168, |
|
"grad_norm": 3.261758327484131, |
|
"learning_rate": 4.995355231866826e-05, |
|
"loss": 7.9049, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.037158145065398336, |
|
"grad_norm": 2.7945966720581055, |
|
"learning_rate": 4.99071046373365e-05, |
|
"loss": 7.313, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0557372175980975, |
|
"grad_norm": 3.66495680809021, |
|
"learning_rate": 4.986065695600476e-05, |
|
"loss": 7.0529, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07431629013079667, |
|
"grad_norm": 3.696575880050659, |
|
"learning_rate": 4.981420927467301e-05, |
|
"loss": 6.8319, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09289536266349584, |
|
"grad_norm": 4.190186977386475, |
|
"learning_rate": 4.976776159334126e-05, |
|
"loss": 6.5888, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.111474435196195, |
|
"grad_norm": 4.945929050445557, |
|
"learning_rate": 4.972131391200951e-05, |
|
"loss": 6.3799, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13005350772889418, |
|
"grad_norm": 4.898506164550781, |
|
"learning_rate": 4.967486623067777e-05, |
|
"loss": 6.1872, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14863258026159334, |
|
"grad_norm": 4.006326675415039, |
|
"learning_rate": 4.962841854934602e-05, |
|
"loss": 6.0257, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1672116527942925, |
|
"grad_norm": 5.2170209884643555, |
|
"learning_rate": 4.958197086801427e-05, |
|
"loss": 5.8457, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18579072532699167, |
|
"grad_norm": 5.515870094299316, |
|
"learning_rate": 4.953552318668253e-05, |
|
"loss": 5.7231, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.20436979785969084, |
|
"grad_norm": 6.801781177520752, |
|
"learning_rate": 4.948907550535077e-05, |
|
"loss": 5.5989, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22294887039239, |
|
"grad_norm": 5.6380205154418945, |
|
"learning_rate": 4.944262782401903e-05, |
|
"loss": 5.4627, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24152794292508917, |
|
"grad_norm": 4.960023880004883, |
|
"learning_rate": 4.939618014268728e-05, |
|
"loss": 5.3395, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.26010701545778836, |
|
"grad_norm": 4.814383506774902, |
|
"learning_rate": 4.934973246135553e-05, |
|
"loss": 5.2441, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2786860879904875, |
|
"grad_norm": 5.070895195007324, |
|
"learning_rate": 4.930328478002379e-05, |
|
"loss": 5.1584, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.2972651605231867, |
|
"grad_norm": 4.370517730712891, |
|
"learning_rate": 4.925683709869204e-05, |
|
"loss": 5.08, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3158442330558858, |
|
"grad_norm": 5.01335334777832, |
|
"learning_rate": 4.921038941736029e-05, |
|
"loss": 5.0119, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.334423305588585, |
|
"grad_norm": 5.798189163208008, |
|
"learning_rate": 4.916394173602854e-05, |
|
"loss": 4.9469, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3530023781212842, |
|
"grad_norm": 5.567455291748047, |
|
"learning_rate": 4.9117494054696796e-05, |
|
"loss": 4.8906, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37158145065398335, |
|
"grad_norm": 5.719528675079346, |
|
"learning_rate": 4.907104637336504e-05, |
|
"loss": 4.826, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.39016052318668254, |
|
"grad_norm": 5.381674289703369, |
|
"learning_rate": 4.90245986920333e-05, |
|
"loss": 4.7627, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4087395957193817, |
|
"grad_norm": 5.749002933502197, |
|
"learning_rate": 4.8978151010701554e-05, |
|
"loss": 4.7247, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.42731866825208087, |
|
"grad_norm": 5.553402423858643, |
|
"learning_rate": 4.89317033293698e-05, |
|
"loss": 4.648, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.44589774078478, |
|
"grad_norm": 5.483209133148193, |
|
"learning_rate": 4.8885255648038055e-05, |
|
"loss": 4.5921, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4644768133174792, |
|
"grad_norm": 5.7984747886657715, |
|
"learning_rate": 4.8838807966706305e-05, |
|
"loss": 4.5489, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.48305588585017833, |
|
"grad_norm": 5.168997287750244, |
|
"learning_rate": 4.8792360285374556e-05, |
|
"loss": 4.5258, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5016349583828775, |
|
"grad_norm": 5.243541240692139, |
|
"learning_rate": 4.8745912604042806e-05, |
|
"loss": 4.4826, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5202140309155767, |
|
"grad_norm": 5.172119140625, |
|
"learning_rate": 4.8699464922711064e-05, |
|
"loss": 4.4446, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5387931034482759, |
|
"grad_norm": 4.617650032043457, |
|
"learning_rate": 4.8653017241379314e-05, |
|
"loss": 4.4262, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.557372175980975, |
|
"grad_norm": 5.853124618530273, |
|
"learning_rate": 4.8606569560047565e-05, |
|
"loss": 4.3786, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5759512485136742, |
|
"grad_norm": 4.6473493576049805, |
|
"learning_rate": 4.8560121878715815e-05, |
|
"loss": 4.3497, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.5945303210463734, |
|
"grad_norm": 5.469015121459961, |
|
"learning_rate": 4.8513674197384066e-05, |
|
"loss": 4.2946, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6131093935790726, |
|
"grad_norm": 4.9087371826171875, |
|
"learning_rate": 4.846722651605232e-05, |
|
"loss": 4.2847, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6316884661117717, |
|
"grad_norm": 5.479755878448486, |
|
"learning_rate": 4.842077883472057e-05, |
|
"loss": 4.2635, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6502675386444708, |
|
"grad_norm": 5.843453407287598, |
|
"learning_rate": 4.8374331153388824e-05, |
|
"loss": 4.2108, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.66884661117717, |
|
"grad_norm": 5.5419535636901855, |
|
"learning_rate": 4.832788347205708e-05, |
|
"loss": 4.2187, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.6874256837098692, |
|
"grad_norm": 5.9003376960754395, |
|
"learning_rate": 4.8281435790725325e-05, |
|
"loss": 4.16, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7060047562425684, |
|
"grad_norm": 5.591574668884277, |
|
"learning_rate": 4.823498810939358e-05, |
|
"loss": 4.1433, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7245838287752675, |
|
"grad_norm": 5.295060634613037, |
|
"learning_rate": 4.818854042806183e-05, |
|
"loss": 4.1182, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7431629013079667, |
|
"grad_norm": 5.101735591888428, |
|
"learning_rate": 4.814209274673008e-05, |
|
"loss": 4.1155, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7617419738406659, |
|
"grad_norm": 5.852224349975586, |
|
"learning_rate": 4.809564506539834e-05, |
|
"loss": 4.0749, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.7803210463733651, |
|
"grad_norm": 5.916712760925293, |
|
"learning_rate": 4.804919738406659e-05, |
|
"loss": 4.0554, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.7989001189060642, |
|
"grad_norm": 5.017261505126953, |
|
"learning_rate": 4.800274970273484e-05, |
|
"loss": 4.0196, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8174791914387634, |
|
"grad_norm": 5.785404682159424, |
|
"learning_rate": 4.795630202140309e-05, |
|
"loss": 4.0196, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8360582639714625, |
|
"grad_norm": 5.758474826812744, |
|
"learning_rate": 4.790985434007135e-05, |
|
"loss": 4.0027, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.8546373365041617, |
|
"grad_norm": 6.120078086853027, |
|
"learning_rate": 4.786340665873959e-05, |
|
"loss": 3.9577, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.8732164090368609, |
|
"grad_norm": 6.130490779876709, |
|
"learning_rate": 4.781695897740785e-05, |
|
"loss": 3.9507, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.89179548156956, |
|
"grad_norm": 4.95521354675293, |
|
"learning_rate": 4.77705112960761e-05, |
|
"loss": 3.9216, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9103745541022592, |
|
"grad_norm": 5.775145530700684, |
|
"learning_rate": 4.772406361474435e-05, |
|
"loss": 3.9177, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9289536266349584, |
|
"grad_norm": 5.804774761199951, |
|
"learning_rate": 4.767761593341261e-05, |
|
"loss": 3.8925, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9475326991676576, |
|
"grad_norm": 5.883671760559082, |
|
"learning_rate": 4.763116825208086e-05, |
|
"loss": 3.8722, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.9661117717003567, |
|
"grad_norm": 5.462569236755371, |
|
"learning_rate": 4.758472057074911e-05, |
|
"loss": 3.8502, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.9846908442330559, |
|
"grad_norm": 5.68014669418335, |
|
"learning_rate": 4.753827288941736e-05, |
|
"loss": 3.8339, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.003269916765755, |
|
"grad_norm": 5.658189296722412, |
|
"learning_rate": 4.749182520808562e-05, |
|
"loss": 3.8221, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0218489892984541, |
|
"grad_norm": 5.337306976318359, |
|
"learning_rate": 4.744537752675387e-05, |
|
"loss": 3.8099, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.0404280618311534, |
|
"grad_norm": 5.810146808624268, |
|
"learning_rate": 4.739892984542212e-05, |
|
"loss": 3.7751, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.0590071343638525, |
|
"grad_norm": 5.76528263092041, |
|
"learning_rate": 4.7352482164090375e-05, |
|
"loss": 3.7551, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.0775862068965518, |
|
"grad_norm": 6.346560955047607, |
|
"learning_rate": 4.730603448275862e-05, |
|
"loss": 3.7454, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.096165279429251, |
|
"grad_norm": 5.019473552703857, |
|
"learning_rate": 4.7259586801426876e-05, |
|
"loss": 3.74, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.11474435196195, |
|
"grad_norm": 5.211459636688232, |
|
"learning_rate": 4.721313912009513e-05, |
|
"loss": 3.7018, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1333234244946493, |
|
"grad_norm": 5.713869571685791, |
|
"learning_rate": 4.716669143876338e-05, |
|
"loss": 3.6834, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.1519024970273484, |
|
"grad_norm": 5.4642744064331055, |
|
"learning_rate": 4.7120243757431635e-05, |
|
"loss": 3.7053, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.1704815695600477, |
|
"grad_norm": 6.932915687561035, |
|
"learning_rate": 4.7073796076099885e-05, |
|
"loss": 3.6719, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.1890606420927468, |
|
"grad_norm": 5.861956596374512, |
|
"learning_rate": 4.7027348394768136e-05, |
|
"loss": 3.6613, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2076397146254458, |
|
"grad_norm": 5.654363632202148, |
|
"learning_rate": 4.6980900713436386e-05, |
|
"loss": 3.6413, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.2262187871581451, |
|
"grad_norm": 5.2097392082214355, |
|
"learning_rate": 4.693445303210464e-05, |
|
"loss": 3.6603, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.2447978596908442, |
|
"grad_norm": 5.455073833465576, |
|
"learning_rate": 4.688800535077289e-05, |
|
"loss": 3.6455, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.2633769322235433, |
|
"grad_norm": 5.670316219329834, |
|
"learning_rate": 4.6841557669441144e-05, |
|
"loss": 3.6029, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.2819560047562426, |
|
"grad_norm": 6.064113140106201, |
|
"learning_rate": 4.6795109988109395e-05, |
|
"loss": 3.5978, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.3005350772889417, |
|
"grad_norm": 5.650447368621826, |
|
"learning_rate": 4.6748662306777645e-05, |
|
"loss": 3.5817, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3191141498216408, |
|
"grad_norm": 7.115864276885986, |
|
"learning_rate": 4.67022146254459e-05, |
|
"loss": 3.5769, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.33769322235434, |
|
"grad_norm": 6.497522354125977, |
|
"learning_rate": 4.665576694411415e-05, |
|
"loss": 3.5825, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3562722948870394, |
|
"grad_norm": 5.834658622741699, |
|
"learning_rate": 4.6609319262782404e-05, |
|
"loss": 3.555, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.3748513674197385, |
|
"grad_norm": 5.968478679656982, |
|
"learning_rate": 4.6562871581450654e-05, |
|
"loss": 3.5476, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.3934304399524375, |
|
"grad_norm": 5.435029983520508, |
|
"learning_rate": 4.651642390011891e-05, |
|
"loss": 3.518, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.4120095124851368, |
|
"grad_norm": 5.952866554260254, |
|
"learning_rate": 4.646997621878716e-05, |
|
"loss": 3.5201, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.430588585017836, |
|
"grad_norm": 6.440069675445557, |
|
"learning_rate": 4.642352853745541e-05, |
|
"loss": 3.5159, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.449167657550535, |
|
"grad_norm": 5.686422824859619, |
|
"learning_rate": 4.637708085612367e-05, |
|
"loss": 3.5054, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.4677467300832343, |
|
"grad_norm": 6.039205551147461, |
|
"learning_rate": 4.633063317479191e-05, |
|
"loss": 3.499, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.4863258026159334, |
|
"grad_norm": 5.929929256439209, |
|
"learning_rate": 4.628418549346017e-05, |
|
"loss": 3.5125, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5049048751486325, |
|
"grad_norm": 6.127495288848877, |
|
"learning_rate": 4.623773781212842e-05, |
|
"loss": 3.4683, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.5234839476813318, |
|
"grad_norm": 5.202524662017822, |
|
"learning_rate": 4.619129013079667e-05, |
|
"loss": 3.4457, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.542063020214031, |
|
"grad_norm": 6.199319839477539, |
|
"learning_rate": 4.614484244946493e-05, |
|
"loss": 3.4489, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.56064209274673, |
|
"grad_norm": 5.948836803436279, |
|
"learning_rate": 4.609839476813318e-05, |
|
"loss": 3.4412, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.5792211652794292, |
|
"grad_norm": 6.036122798919678, |
|
"learning_rate": 4.605194708680143e-05, |
|
"loss": 3.4259, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.5978002378121285, |
|
"grad_norm": 7.213582992553711, |
|
"learning_rate": 4.600549940546968e-05, |
|
"loss": 3.425, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6163793103448276, |
|
"grad_norm": 5.579181671142578, |
|
"learning_rate": 4.595905172413794e-05, |
|
"loss": 3.4094, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.6349583828775267, |
|
"grad_norm": 6.071746349334717, |
|
"learning_rate": 4.591260404280618e-05, |
|
"loss": 3.3886, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.653537455410226, |
|
"grad_norm": 6.017687797546387, |
|
"learning_rate": 4.586615636147444e-05, |
|
"loss": 3.3987, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.672116527942925, |
|
"grad_norm": 6.2989349365234375, |
|
"learning_rate": 4.581970868014269e-05, |
|
"loss": 3.3987, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.6906956004756242, |
|
"grad_norm": 5.678338527679443, |
|
"learning_rate": 4.577326099881094e-05, |
|
"loss": 3.3755, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.7092746730083235, |
|
"grad_norm": 6.020495891571045, |
|
"learning_rate": 4.57268133174792e-05, |
|
"loss": 3.3591, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.7278537455410226, |
|
"grad_norm": 5.941638946533203, |
|
"learning_rate": 4.568036563614744e-05, |
|
"loss": 3.3574, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.7464328180737216, |
|
"grad_norm": 6.722168922424316, |
|
"learning_rate": 4.56339179548157e-05, |
|
"loss": 3.3746, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.765011890606421, |
|
"grad_norm": 6.632647514343262, |
|
"learning_rate": 4.558747027348395e-05, |
|
"loss": 3.3535, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.7835909631391202, |
|
"grad_norm": 6.448876857757568, |
|
"learning_rate": 4.55410225921522e-05, |
|
"loss": 3.3581, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.802170035671819, |
|
"grad_norm": 5.348858833312988, |
|
"learning_rate": 4.5494574910820456e-05, |
|
"loss": 3.3432, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.8207491082045184, |
|
"grad_norm": 6.1672186851501465, |
|
"learning_rate": 4.5448127229488707e-05, |
|
"loss": 3.3422, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.8393281807372177, |
|
"grad_norm": 5.889304161071777, |
|
"learning_rate": 4.540167954815696e-05, |
|
"loss": 3.3224, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.8579072532699168, |
|
"grad_norm": 6.291742324829102, |
|
"learning_rate": 4.535523186682521e-05, |
|
"loss": 3.3133, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.8764863258026159, |
|
"grad_norm": 5.806668758392334, |
|
"learning_rate": 4.5308784185493465e-05, |
|
"loss": 3.3243, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.8950653983353152, |
|
"grad_norm": 6.051152229309082, |
|
"learning_rate": 4.5262336504161715e-05, |
|
"loss": 3.2947, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.9136444708680143, |
|
"grad_norm": 6.568633079528809, |
|
"learning_rate": 4.5215888822829966e-05, |
|
"loss": 3.2911, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.9322235434007133, |
|
"grad_norm": 6.369818210601807, |
|
"learning_rate": 4.516944114149822e-05, |
|
"loss": 3.2676, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.9508026159334126, |
|
"grad_norm": 5.4059014320373535, |
|
"learning_rate": 4.512299346016647e-05, |
|
"loss": 3.2633, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.9693816884661117, |
|
"grad_norm": 5.883274078369141, |
|
"learning_rate": 4.5076545778834724e-05, |
|
"loss": 3.2759, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.9879607609988108, |
|
"grad_norm": 6.672674179077148, |
|
"learning_rate": 4.5030098097502975e-05, |
|
"loss": 3.2794, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.00653983353151, |
|
"grad_norm": 6.007123947143555, |
|
"learning_rate": 4.4983650416171225e-05, |
|
"loss": 3.2476, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.0251189060642094, |
|
"grad_norm": 5.490503787994385, |
|
"learning_rate": 4.4937202734839475e-05, |
|
"loss": 3.2269, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.0436979785969083, |
|
"grad_norm": 6.020650386810303, |
|
"learning_rate": 4.489075505350773e-05, |
|
"loss": 3.2368, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.0622770511296076, |
|
"grad_norm": 6.575464248657227, |
|
"learning_rate": 4.484430737217598e-05, |
|
"loss": 3.1994, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.080856123662307, |
|
"grad_norm": 5.844559192657471, |
|
"learning_rate": 4.4797859690844234e-05, |
|
"loss": 3.1938, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.0994351961950057, |
|
"grad_norm": 5.892060279846191, |
|
"learning_rate": 4.475141200951249e-05, |
|
"loss": 3.205, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.118014268727705, |
|
"grad_norm": 5.993409156799316, |
|
"learning_rate": 4.4704964328180735e-05, |
|
"loss": 3.212, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.1365933412604043, |
|
"grad_norm": 6.564383029937744, |
|
"learning_rate": 4.465851664684899e-05, |
|
"loss": 3.1842, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.1551724137931036, |
|
"grad_norm": 5.989982604980469, |
|
"learning_rate": 4.461206896551724e-05, |
|
"loss": 3.1859, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.1737514863258025, |
|
"grad_norm": 5.895771503448486, |
|
"learning_rate": 4.456562128418549e-05, |
|
"loss": 3.1839, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.192330558858502, |
|
"grad_norm": 5.832091331481934, |
|
"learning_rate": 4.451917360285375e-05, |
|
"loss": 3.1728, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.210909631391201, |
|
"grad_norm": 5.480752468109131, |
|
"learning_rate": 4.4472725921522e-05, |
|
"loss": 3.1711, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.2294887039239, |
|
"grad_norm": 5.683358192443848, |
|
"learning_rate": 4.442627824019025e-05, |
|
"loss": 3.1669, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.2480677764565993, |
|
"grad_norm": 6.638919830322266, |
|
"learning_rate": 4.43798305588585e-05, |
|
"loss": 3.1677, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.2666468489892986, |
|
"grad_norm": 5.941629886627197, |
|
"learning_rate": 4.433338287752676e-05, |
|
"loss": 3.1506, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.2852259215219974, |
|
"grad_norm": 6.227372169494629, |
|
"learning_rate": 4.428693519619501e-05, |
|
"loss": 3.1607, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.3038049940546967, |
|
"grad_norm": 6.063544750213623, |
|
"learning_rate": 4.424048751486326e-05, |
|
"loss": 3.1477, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.322384066587396, |
|
"grad_norm": 5.8914618492126465, |
|
"learning_rate": 4.419403983353152e-05, |
|
"loss": 3.1362, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.3409631391200953, |
|
"grad_norm": 5.964859962463379, |
|
"learning_rate": 4.414759215219976e-05, |
|
"loss": 3.1282, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.359542211652794, |
|
"grad_norm": 5.622297763824463, |
|
"learning_rate": 4.410114447086802e-05, |
|
"loss": 3.1386, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.3781212841854935, |
|
"grad_norm": 6.728824138641357, |
|
"learning_rate": 4.405469678953627e-05, |
|
"loss": 3.1202, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.396700356718193, |
|
"grad_norm": 6.513198375701904, |
|
"learning_rate": 4.400824910820452e-05, |
|
"loss": 3.1455, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.4152794292508917, |
|
"grad_norm": 6.273243427276611, |
|
"learning_rate": 4.396180142687277e-05, |
|
"loss": 3.1143, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.433858501783591, |
|
"grad_norm": 5.384542465209961, |
|
"learning_rate": 4.391535374554103e-05, |
|
"loss": 3.1111, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.4524375743162903, |
|
"grad_norm": 5.742457866668701, |
|
"learning_rate": 4.386890606420928e-05, |
|
"loss": 3.1146, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.471016646848989, |
|
"grad_norm": 6.236721038818359, |
|
"learning_rate": 4.382245838287753e-05, |
|
"loss": 3.111, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.4895957193816884, |
|
"grad_norm": 6.027072429656982, |
|
"learning_rate": 4.3776010701545785e-05, |
|
"loss": 3.088, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.5081747919143877, |
|
"grad_norm": 6.66511869430542, |
|
"learning_rate": 4.372956302021403e-05, |
|
"loss": 3.1021, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.5267538644470866, |
|
"grad_norm": 5.52970552444458, |
|
"learning_rate": 4.3683115338882286e-05, |
|
"loss": 3.1001, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.545332936979786, |
|
"grad_norm": 6.7701897621154785, |
|
"learning_rate": 4.363666765755054e-05, |
|
"loss": 3.0905, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.563912009512485, |
|
"grad_norm": 5.972938537597656, |
|
"learning_rate": 4.359021997621879e-05, |
|
"loss": 3.0665, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.582491082045184, |
|
"grad_norm": 6.33815860748291, |
|
"learning_rate": 4.3543772294887044e-05, |
|
"loss": 3.0703, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.6010701545778834, |
|
"grad_norm": 5.83467435836792, |
|
"learning_rate": 4.3497324613555295e-05, |
|
"loss": 3.0804, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.6196492271105827, |
|
"grad_norm": 6.139744758605957, |
|
"learning_rate": 4.3450876932223545e-05, |
|
"loss": 3.0668, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.6382282996432815, |
|
"grad_norm": 7.028213977813721, |
|
"learning_rate": 4.3404429250891796e-05, |
|
"loss": 3.0549, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.656807372175981, |
|
"grad_norm": 5.353559970855713, |
|
"learning_rate": 4.335798156956005e-05, |
|
"loss": 3.0684, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.67538644470868, |
|
"grad_norm": 6.900554656982422, |
|
"learning_rate": 4.3311533888228304e-05, |
|
"loss": 3.035, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.6939655172413794, |
|
"grad_norm": 6.68520450592041, |
|
"learning_rate": 4.3265086206896554e-05, |
|
"loss": 3.0307, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.7125445897740788, |
|
"grad_norm": 6.080930233001709, |
|
"learning_rate": 4.321863852556481e-05, |
|
"loss": 3.0379, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.7311236623067776, |
|
"grad_norm": 5.922386646270752, |
|
"learning_rate": 4.3172190844233055e-05, |
|
"loss": 3.0393, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.749702734839477, |
|
"grad_norm": 6.372087001800537, |
|
"learning_rate": 4.312574316290131e-05, |
|
"loss": 3.0243, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.768281807372176, |
|
"grad_norm": 6.071821689605713, |
|
"learning_rate": 4.307929548156956e-05, |
|
"loss": 3.0283, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.786860879904875, |
|
"grad_norm": 6.012415409088135, |
|
"learning_rate": 4.3032847800237813e-05, |
|
"loss": 3.025, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.8054399524375744, |
|
"grad_norm": 6.770437717437744, |
|
"learning_rate": 4.2986400118906064e-05, |
|
"loss": 3.0242, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.8240190249702737, |
|
"grad_norm": 6.748111724853516, |
|
"learning_rate": 4.2939952437574314e-05, |
|
"loss": 3.016, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.8425980975029725, |
|
"grad_norm": 6.000352382659912, |
|
"learning_rate": 4.289350475624257e-05, |
|
"loss": 3.0208, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.861177170035672, |
|
"grad_norm": 6.079233646392822, |
|
"learning_rate": 4.284705707491082e-05, |
|
"loss": 3.0062, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.879756242568371, |
|
"grad_norm": 5.8158040046691895, |
|
"learning_rate": 4.280060939357907e-05, |
|
"loss": 3.0162, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.89833531510107, |
|
"grad_norm": 7.081645965576172, |
|
"learning_rate": 4.275416171224732e-05, |
|
"loss": 2.9933, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.9169143876337693, |
|
"grad_norm": 7.042798042297363, |
|
"learning_rate": 4.270771403091558e-05, |
|
"loss": 3.0055, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.9354934601664686, |
|
"grad_norm": 6.736599445343018, |
|
"learning_rate": 4.266126634958383e-05, |
|
"loss": 2.9964, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.9540725326991675, |
|
"grad_norm": 6.28444242477417, |
|
"learning_rate": 4.261481866825208e-05, |
|
"loss": 2.9943, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.972651605231867, |
|
"grad_norm": 6.56734561920166, |
|
"learning_rate": 4.256837098692034e-05, |
|
"loss": 2.9807, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.991230677764566, |
|
"grad_norm": 6.312285423278809, |
|
"learning_rate": 4.252192330558858e-05, |
|
"loss": 2.9975, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.009809750297265, |
|
"grad_norm": 5.604495048522949, |
|
"learning_rate": 4.247547562425684e-05, |
|
"loss": 2.964, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.0283888228299642, |
|
"grad_norm": 6.2611083984375, |
|
"learning_rate": 4.242902794292509e-05, |
|
"loss": 2.9411, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.0469678953626635, |
|
"grad_norm": 6.149163246154785, |
|
"learning_rate": 4.238258026159334e-05, |
|
"loss": 2.9448, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.065546967895363, |
|
"grad_norm": 6.137192249298096, |
|
"learning_rate": 4.23361325802616e-05, |
|
"loss": 2.9208, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.0841260404280617, |
|
"grad_norm": 5.697031497955322, |
|
"learning_rate": 4.228968489892985e-05, |
|
"loss": 2.9352, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.102705112960761, |
|
"grad_norm": 6.298037528991699, |
|
"learning_rate": 4.22432372175981e-05, |
|
"loss": 2.9318, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.1212841854934603, |
|
"grad_norm": 6.293707370758057, |
|
"learning_rate": 4.219678953626635e-05, |
|
"loss": 2.9484, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.139863258026159, |
|
"grad_norm": 6.098722457885742, |
|
"learning_rate": 4.215034185493461e-05, |
|
"loss": 2.923, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.1584423305588585, |
|
"grad_norm": 5.604320526123047, |
|
"learning_rate": 4.210389417360285e-05, |
|
"loss": 2.932, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.177021403091558, |
|
"grad_norm": 6.741579055786133, |
|
"learning_rate": 4.205744649227111e-05, |
|
"loss": 2.911, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.1956004756242566, |
|
"grad_norm": 6.246683120727539, |
|
"learning_rate": 4.201099881093936e-05, |
|
"loss": 2.9139, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.214179548156956, |
|
"grad_norm": 6.600460052490234, |
|
"learning_rate": 4.196455112960761e-05, |
|
"loss": 2.9329, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.2327586206896552, |
|
"grad_norm": 6.846024990081787, |
|
"learning_rate": 4.1918103448275866e-05, |
|
"loss": 2.9189, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.2513376932223546, |
|
"grad_norm": 6.301860332489014, |
|
"learning_rate": 4.1871655766944116e-05, |
|
"loss": 2.9191, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.2699167657550534, |
|
"grad_norm": 5.542537689208984, |
|
"learning_rate": 4.182520808561237e-05, |
|
"loss": 2.8991, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.2884958382877527, |
|
"grad_norm": 6.527828216552734, |
|
"learning_rate": 4.177876040428062e-05, |
|
"loss": 2.8959, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.307074910820452, |
|
"grad_norm": 6.696499824523926, |
|
"learning_rate": 4.1732312722948875e-05, |
|
"loss": 2.904, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.325653983353151, |
|
"grad_norm": 6.901641368865967, |
|
"learning_rate": 4.1685865041617125e-05, |
|
"loss": 2.8918, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.34423305588585, |
|
"grad_norm": 5.950034141540527, |
|
"learning_rate": 4.1639417360285376e-05, |
|
"loss": 2.8953, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.3628121284185495, |
|
"grad_norm": 6.489218235015869, |
|
"learning_rate": 4.159296967895363e-05, |
|
"loss": 2.8983, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.3813912009512483, |
|
"grad_norm": 7.06480073928833, |
|
"learning_rate": 4.154652199762188e-05, |
|
"loss": 2.9144, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.3999702734839476, |
|
"grad_norm": 6.297793388366699, |
|
"learning_rate": 4.1500074316290134e-05, |
|
"loss": 2.8853, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.418549346016647, |
|
"grad_norm": 6.4150919914245605, |
|
"learning_rate": 4.1453626634958384e-05, |
|
"loss": 2.8906, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.437128418549346, |
|
"grad_norm": 7.301102638244629, |
|
"learning_rate": 4.1407178953626635e-05, |
|
"loss": 2.8706, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.455707491082045, |
|
"grad_norm": 6.061220645904541, |
|
"learning_rate": 4.136073127229489e-05, |
|
"loss": 2.8732, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.4742865636147444, |
|
"grad_norm": 6.419704914093018, |
|
"learning_rate": 4.131428359096314e-05, |
|
"loss": 2.861, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.4928656361474433, |
|
"grad_norm": 7.272397994995117, |
|
"learning_rate": 4.126783590963139e-05, |
|
"loss": 2.8942, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.5114447086801426, |
|
"grad_norm": 6.250875949859619, |
|
"learning_rate": 4.1221388228299644e-05, |
|
"loss": 2.8639, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.530023781212842, |
|
"grad_norm": 6.624760150909424, |
|
"learning_rate": 4.11749405469679e-05, |
|
"loss": 2.8706, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.548602853745541, |
|
"grad_norm": 7.242002487182617, |
|
"learning_rate": 4.1128492865636145e-05, |
|
"loss": 2.8549, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.56718192627824, |
|
"grad_norm": 6.070115089416504, |
|
"learning_rate": 4.10820451843044e-05, |
|
"loss": 2.8564, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.5857609988109393, |
|
"grad_norm": 6.022694110870361, |
|
"learning_rate": 4.103559750297266e-05, |
|
"loss": 2.8637, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.6043400713436387, |
|
"grad_norm": 5.543400287628174, |
|
"learning_rate": 4.09891498216409e-05, |
|
"loss": 2.8219, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.622919143876338, |
|
"grad_norm": 6.441455841064453, |
|
"learning_rate": 4.094270214030916e-05, |
|
"loss": 2.844, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.641498216409037, |
|
"grad_norm": 6.443978786468506, |
|
"learning_rate": 4.089625445897741e-05, |
|
"loss": 2.8337, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.660077288941736, |
|
"grad_norm": 7.063666820526123, |
|
"learning_rate": 4.084980677764566e-05, |
|
"loss": 2.869, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.6786563614744354, |
|
"grad_norm": 6.34807825088501, |
|
"learning_rate": 4.080335909631391e-05, |
|
"loss": 2.8303, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.6972354340071343, |
|
"grad_norm": 6.020463466644287, |
|
"learning_rate": 4.075691141498217e-05, |
|
"loss": 2.8442, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.7158145065398336, |
|
"grad_norm": 6.808725357055664, |
|
"learning_rate": 4.071046373365042e-05, |
|
"loss": 2.8432, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.734393579072533, |
|
"grad_norm": 6.207636833190918, |
|
"learning_rate": 4.066401605231867e-05, |
|
"loss": 2.859, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.7529726516052317, |
|
"grad_norm": 6.0236616134643555, |
|
"learning_rate": 4.061756837098693e-05, |
|
"loss": 2.847, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.771551724137931, |
|
"grad_norm": 5.8015241622924805, |
|
"learning_rate": 4.057112068965517e-05, |
|
"loss": 2.8239, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 3.7901307966706304, |
|
"grad_norm": 6.354222297668457, |
|
"learning_rate": 4.052467300832343e-05, |
|
"loss": 2.8574, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.808709869203329, |
|
"grad_norm": 6.587215900421143, |
|
"learning_rate": 4.047822532699168e-05, |
|
"loss": 2.8354, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 3.8272889417360285, |
|
"grad_norm": 7.283754825592041, |
|
"learning_rate": 4.043177764565993e-05, |
|
"loss": 2.8218, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.845868014268728, |
|
"grad_norm": 6.165238857269287, |
|
"learning_rate": 4.0385329964328186e-05, |
|
"loss": 2.841, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 3.8644470868014267, |
|
"grad_norm": 6.120512008666992, |
|
"learning_rate": 4.033888228299644e-05, |
|
"loss": 2.8195, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 3.883026159334126, |
|
"grad_norm": 6.8183698654174805, |
|
"learning_rate": 4.029243460166469e-05, |
|
"loss": 2.8044, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 3.9016052318668253, |
|
"grad_norm": 5.847311973571777, |
|
"learning_rate": 4.024598692033294e-05, |
|
"loss": 2.8056, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 3.920184304399524, |
|
"grad_norm": 7.423314571380615, |
|
"learning_rate": 4.019953923900119e-05, |
|
"loss": 2.8164, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 3.9387633769322234, |
|
"grad_norm": 6.736742973327637, |
|
"learning_rate": 4.015309155766944e-05, |
|
"loss": 2.8099, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 3.9573424494649228, |
|
"grad_norm": 6.3364644050598145, |
|
"learning_rate": 4.0106643876337696e-05, |
|
"loss": 2.8138, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 3.9759215219976216, |
|
"grad_norm": 6.03089714050293, |
|
"learning_rate": 4.006019619500595e-05, |
|
"loss": 2.8161, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 3.994500594530321, |
|
"grad_norm": 7.099618911743164, |
|
"learning_rate": 4.00137485136742e-05, |
|
"loss": 2.7899, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 4.01307966706302, |
|
"grad_norm": 6.682999134063721, |
|
"learning_rate": 3.9967300832342454e-05, |
|
"loss": 2.7953, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.031658739595719, |
|
"grad_norm": 5.30817985534668, |
|
"learning_rate": 3.99208531510107e-05, |
|
"loss": 2.7662, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 4.050237812128419, |
|
"grad_norm": 6.152495384216309, |
|
"learning_rate": 3.9874405469678955e-05, |
|
"loss": 2.7628, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.068816884661118, |
|
"grad_norm": 6.075979232788086, |
|
"learning_rate": 3.9827957788347206e-05, |
|
"loss": 2.7805, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 4.0873959571938165, |
|
"grad_norm": 6.708266258239746, |
|
"learning_rate": 3.9781510107015456e-05, |
|
"loss": 2.7607, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.105975029726516, |
|
"grad_norm": 6.425528049468994, |
|
"learning_rate": 3.9735062425683714e-05, |
|
"loss": 2.7738, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 4.124554102259215, |
|
"grad_norm": 6.978008270263672, |
|
"learning_rate": 3.9688614744351964e-05, |
|
"loss": 2.7654, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.143133174791914, |
|
"grad_norm": 6.780577182769775, |
|
"learning_rate": 3.9642167063020215e-05, |
|
"loss": 2.7632, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 4.161712247324614, |
|
"grad_norm": 5.834601879119873, |
|
"learning_rate": 3.9595719381688465e-05, |
|
"loss": 2.7671, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.180291319857313, |
|
"grad_norm": 7.513933181762695, |
|
"learning_rate": 3.954927170035672e-05, |
|
"loss": 2.7738, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 4.1988703923900115, |
|
"grad_norm": 6.303833484649658, |
|
"learning_rate": 3.950282401902497e-05, |
|
"loss": 2.782, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.217449464922711, |
|
"grad_norm": 5.807947158813477, |
|
"learning_rate": 3.945637633769322e-05, |
|
"loss": 2.7434, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 4.23602853745541, |
|
"grad_norm": 6.926473617553711, |
|
"learning_rate": 3.940992865636148e-05, |
|
"loss": 2.7458, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.25460760998811, |
|
"grad_norm": 6.764691352844238, |
|
"learning_rate": 3.9363480975029724e-05, |
|
"loss": 2.7357, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 4.273186682520809, |
|
"grad_norm": 5.976272106170654, |
|
"learning_rate": 3.931703329369798e-05, |
|
"loss": 2.7473, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.2917657550535075, |
|
"grad_norm": 6.0660810470581055, |
|
"learning_rate": 3.927058561236623e-05, |
|
"loss": 2.7387, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 4.310344827586207, |
|
"grad_norm": 6.600549221038818, |
|
"learning_rate": 3.922413793103448e-05, |
|
"loss": 2.7409, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.328923900118906, |
|
"grad_norm": 7.705731391906738, |
|
"learning_rate": 3.917769024970274e-05, |
|
"loss": 2.7408, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 4.347502972651605, |
|
"grad_norm": 6.347229957580566, |
|
"learning_rate": 3.913124256837099e-05, |
|
"loss": 2.7517, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.366082045184305, |
|
"grad_norm": 7.695369243621826, |
|
"learning_rate": 3.908479488703924e-05, |
|
"loss": 2.7443, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 4.384661117717004, |
|
"grad_norm": 6.612791538238525, |
|
"learning_rate": 3.903834720570749e-05, |
|
"loss": 2.7378, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.4032401902497025, |
|
"grad_norm": 6.125636577606201, |
|
"learning_rate": 3.899189952437575e-05, |
|
"loss": 2.7224, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 4.421819262782402, |
|
"grad_norm": 6.215822696685791, |
|
"learning_rate": 3.894545184304399e-05, |
|
"loss": 2.7311, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.440398335315101, |
|
"grad_norm": 6.436295032501221, |
|
"learning_rate": 3.889900416171225e-05, |
|
"loss": 2.722, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 4.4589774078478, |
|
"grad_norm": 6.271787166595459, |
|
"learning_rate": 3.88525564803805e-05, |
|
"loss": 2.7421, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.4775564803805, |
|
"grad_norm": 5.990880012512207, |
|
"learning_rate": 3.880610879904875e-05, |
|
"loss": 2.7284, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 4.4961355529131986, |
|
"grad_norm": 6.567028999328613, |
|
"learning_rate": 3.875966111771701e-05, |
|
"loss": 2.7244, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.514714625445897, |
|
"grad_norm": 6.399959087371826, |
|
"learning_rate": 3.871321343638526e-05, |
|
"loss": 2.7139, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 4.533293697978597, |
|
"grad_norm": 6.813540458679199, |
|
"learning_rate": 3.866676575505351e-05, |
|
"loss": 2.7177, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.551872770511296, |
|
"grad_norm": 6.263701438903809, |
|
"learning_rate": 3.862031807372176e-05, |
|
"loss": 2.7245, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 4.570451843043995, |
|
"grad_norm": 6.281601428985596, |
|
"learning_rate": 3.8573870392390017e-05, |
|
"loss": 2.728, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.589030915576695, |
|
"grad_norm": 6.198410511016846, |
|
"learning_rate": 3.852742271105827e-05, |
|
"loss": 2.7187, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 4.6076099881093935, |
|
"grad_norm": 7.052980899810791, |
|
"learning_rate": 3.848097502972652e-05, |
|
"loss": 2.7294, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.626189060642092, |
|
"grad_norm": 6.8353776931762695, |
|
"learning_rate": 3.8434527348394775e-05, |
|
"loss": 2.7366, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 4.644768133174792, |
|
"grad_norm": 6.245896816253662, |
|
"learning_rate": 3.838807966706302e-05, |
|
"loss": 2.7061, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.663347205707491, |
|
"grad_norm": 5.742074489593506, |
|
"learning_rate": 3.8341631985731276e-05, |
|
"loss": 2.7031, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 4.681926278240191, |
|
"grad_norm": 6.55544376373291, |
|
"learning_rate": 3.8295184304399526e-05, |
|
"loss": 2.6768, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.70050535077289, |
|
"grad_norm": 5.943970203399658, |
|
"learning_rate": 3.824873662306778e-05, |
|
"loss": 2.6986, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 4.719084423305588, |
|
"grad_norm": 7.413682460784912, |
|
"learning_rate": 3.8202288941736034e-05, |
|
"loss": 2.7229, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 4.737663495838287, |
|
"grad_norm": 7.258702278137207, |
|
"learning_rate": 3.8155841260404285e-05, |
|
"loss": 2.7006, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 4.756242568370987, |
|
"grad_norm": 6.239523887634277, |
|
"learning_rate": 3.8109393579072535e-05, |
|
"loss": 2.7015, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 4.774821640903686, |
|
"grad_norm": 6.031528949737549, |
|
"learning_rate": 3.8062945897740786e-05, |
|
"loss": 2.6794, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 4.793400713436386, |
|
"grad_norm": 6.504217624664307, |
|
"learning_rate": 3.801649821640904e-05, |
|
"loss": 2.6756, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 4.8119797859690845, |
|
"grad_norm": 6.261529922485352, |
|
"learning_rate": 3.7970050535077287e-05, |
|
"loss": 2.6825, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 4.830558858501783, |
|
"grad_norm": 5.9492292404174805, |
|
"learning_rate": 3.7923602853745544e-05, |
|
"loss": 2.6894, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 4.849137931034483, |
|
"grad_norm": 5.7504706382751465, |
|
"learning_rate": 3.7877155172413794e-05, |
|
"loss": 2.693, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 4.867717003567182, |
|
"grad_norm": 6.545624256134033, |
|
"learning_rate": 3.7830707491082045e-05, |
|
"loss": 2.6888, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 4.886296076099881, |
|
"grad_norm": 6.274423599243164, |
|
"learning_rate": 3.77842598097503e-05, |
|
"loss": 2.6884, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 4.904875148632581, |
|
"grad_norm": 5.632358074188232, |
|
"learning_rate": 3.773781212841855e-05, |
|
"loss": 2.678, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 4.923454221165279, |
|
"grad_norm": 6.883337020874023, |
|
"learning_rate": 3.76913644470868e-05, |
|
"loss": 2.677, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 4.942033293697978, |
|
"grad_norm": 6.676144123077393, |
|
"learning_rate": 3.7644916765755054e-05, |
|
"loss": 2.6788, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 4.960612366230678, |
|
"grad_norm": 8.354021072387695, |
|
"learning_rate": 3.759846908442331e-05, |
|
"loss": 2.6885, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 4.979191438763377, |
|
"grad_norm": 6.048637866973877, |
|
"learning_rate": 3.755202140309156e-05, |
|
"loss": 2.6636, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 4.997770511296076, |
|
"grad_norm": 5.709485054016113, |
|
"learning_rate": 3.750557372175981e-05, |
|
"loss": 2.6601, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 5.0163495838287755, |
|
"grad_norm": 7.082876682281494, |
|
"learning_rate": 3.745912604042807e-05, |
|
"loss": 2.6477, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.034928656361474, |
|
"grad_norm": 6.736342430114746, |
|
"learning_rate": 3.741267835909631e-05, |
|
"loss": 2.6357, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 5.053507728894173, |
|
"grad_norm": 6.7299580574035645, |
|
"learning_rate": 3.736623067776457e-05, |
|
"loss": 2.6532, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.072086801426873, |
|
"grad_norm": 6.488595008850098, |
|
"learning_rate": 3.731978299643282e-05, |
|
"loss": 2.6478, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 5.090665873959572, |
|
"grad_norm": 6.1401262283325195, |
|
"learning_rate": 3.727333531510107e-05, |
|
"loss": 2.6271, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.109244946492271, |
|
"grad_norm": 6.6415300369262695, |
|
"learning_rate": 3.722688763376933e-05, |
|
"loss": 2.6347, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 5.12782401902497, |
|
"grad_norm": 6.715450286865234, |
|
"learning_rate": 3.718043995243757e-05, |
|
"loss": 2.6377, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.146403091557669, |
|
"grad_norm": 6.399317741394043, |
|
"learning_rate": 3.713399227110583e-05, |
|
"loss": 2.6348, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 5.164982164090369, |
|
"grad_norm": 7.233635902404785, |
|
"learning_rate": 3.708754458977408e-05, |
|
"loss": 2.6411, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.183561236623068, |
|
"grad_norm": 6.6088433265686035, |
|
"learning_rate": 3.704109690844233e-05, |
|
"loss": 2.62, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 5.202140309155767, |
|
"grad_norm": 6.2975287437438965, |
|
"learning_rate": 3.699464922711058e-05, |
|
"loss": 2.6337, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.2207193816884665, |
|
"grad_norm": 5.73189115524292, |
|
"learning_rate": 3.694820154577884e-05, |
|
"loss": 2.6347, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 5.239298454221165, |
|
"grad_norm": 6.38447904586792, |
|
"learning_rate": 3.690175386444709e-05, |
|
"loss": 2.6338, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.257877526753864, |
|
"grad_norm": 6.772334098815918, |
|
"learning_rate": 3.685530618311534e-05, |
|
"loss": 2.6371, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 5.276456599286564, |
|
"grad_norm": 6.404881477355957, |
|
"learning_rate": 3.6808858501783596e-05, |
|
"loss": 2.6223, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.295035671819263, |
|
"grad_norm": 6.889057159423828, |
|
"learning_rate": 3.676241082045184e-05, |
|
"loss": 2.624, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 5.313614744351962, |
|
"grad_norm": 6.435732841491699, |
|
"learning_rate": 3.67159631391201e-05, |
|
"loss": 2.6408, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.332193816884661, |
|
"grad_norm": 6.9687418937683105, |
|
"learning_rate": 3.666951545778835e-05, |
|
"loss": 2.6239, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 5.35077288941736, |
|
"grad_norm": 6.787994861602783, |
|
"learning_rate": 3.66230677764566e-05, |
|
"loss": 2.6285, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.369351961950059, |
|
"grad_norm": 6.9550700187683105, |
|
"learning_rate": 3.6576620095124856e-05, |
|
"loss": 2.6173, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 5.387931034482759, |
|
"grad_norm": 6.4159345626831055, |
|
"learning_rate": 3.6530172413793106e-05, |
|
"loss": 2.6186, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.406510107015458, |
|
"grad_norm": 6.8777995109558105, |
|
"learning_rate": 3.6483724732461357e-05, |
|
"loss": 2.6239, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 5.425089179548157, |
|
"grad_norm": 6.115660667419434, |
|
"learning_rate": 3.643727705112961e-05, |
|
"loss": 2.6243, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.443668252080856, |
|
"grad_norm": 7.484089374542236, |
|
"learning_rate": 3.6390829369797864e-05, |
|
"loss": 2.6211, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 5.462247324613555, |
|
"grad_norm": 6.411886692047119, |
|
"learning_rate": 3.6344381688466115e-05, |
|
"loss": 2.61, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.480826397146254, |
|
"grad_norm": 6.482817649841309, |
|
"learning_rate": 3.6297934007134365e-05, |
|
"loss": 2.5962, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 5.499405469678954, |
|
"grad_norm": 5.861370086669922, |
|
"learning_rate": 3.625148632580262e-05, |
|
"loss": 2.6081, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.517984542211653, |
|
"grad_norm": 7.179725170135498, |
|
"learning_rate": 3.6205038644470866e-05, |
|
"loss": 2.6138, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 5.536563614744352, |
|
"grad_norm": 6.607731819152832, |
|
"learning_rate": 3.6158590963139124e-05, |
|
"loss": 2.6335, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.555142687277051, |
|
"grad_norm": 7.58914041519165, |
|
"learning_rate": 3.6112143281807374e-05, |
|
"loss": 2.6188, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 5.57372175980975, |
|
"grad_norm": 6.815672397613525, |
|
"learning_rate": 3.6065695600475625e-05, |
|
"loss": 2.5999, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.592300832342449, |
|
"grad_norm": 7.304187297821045, |
|
"learning_rate": 3.6019247919143875e-05, |
|
"loss": 2.61, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 5.610879904875149, |
|
"grad_norm": 6.256832599639893, |
|
"learning_rate": 3.597280023781213e-05, |
|
"loss": 2.5896, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.629458977407848, |
|
"grad_norm": 6.603561878204346, |
|
"learning_rate": 3.592635255648038e-05, |
|
"loss": 2.5937, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 5.648038049940547, |
|
"grad_norm": 6.757023334503174, |
|
"learning_rate": 3.587990487514863e-05, |
|
"loss": 2.6102, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.666617122473246, |
|
"grad_norm": 6.520168304443359, |
|
"learning_rate": 3.583345719381689e-05, |
|
"loss": 2.5915, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 5.685196195005945, |
|
"grad_norm": 6.486233234405518, |
|
"learning_rate": 3.5787009512485134e-05, |
|
"loss": 2.5833, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 5.703775267538645, |
|
"grad_norm": 5.79095458984375, |
|
"learning_rate": 3.574056183115339e-05, |
|
"loss": 2.5862, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 5.722354340071344, |
|
"grad_norm": 6.390963077545166, |
|
"learning_rate": 3.569411414982164e-05, |
|
"loss": 2.5867, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 5.7409334126040426, |
|
"grad_norm": 6.4793548583984375, |
|
"learning_rate": 3.564766646848989e-05, |
|
"loss": 2.5983, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 5.759512485136742, |
|
"grad_norm": 6.3781585693359375, |
|
"learning_rate": 3.560121878715815e-05, |
|
"loss": 2.6013, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 5.778091557669441, |
|
"grad_norm": 6.004998207092285, |
|
"learning_rate": 3.55547711058264e-05, |
|
"loss": 2.5908, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 5.79667063020214, |
|
"grad_norm": 7.406827926635742, |
|
"learning_rate": 3.550832342449465e-05, |
|
"loss": 2.6126, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 5.81524970273484, |
|
"grad_norm": 6.263004302978516, |
|
"learning_rate": 3.54618757431629e-05, |
|
"loss": 2.5957, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 5.833828775267539, |
|
"grad_norm": 6.236379623413086, |
|
"learning_rate": 3.541542806183116e-05, |
|
"loss": 2.5819, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 5.8524078478002375, |
|
"grad_norm": 7.537994861602783, |
|
"learning_rate": 3.536898038049941e-05, |
|
"loss": 2.574, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 5.870986920332937, |
|
"grad_norm": 5.823127269744873, |
|
"learning_rate": 3.532253269916766e-05, |
|
"loss": 2.5702, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 5.889565992865636, |
|
"grad_norm": 5.820526123046875, |
|
"learning_rate": 3.527608501783592e-05, |
|
"loss": 2.5799, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 5.908145065398335, |
|
"grad_norm": 6.082313060760498, |
|
"learning_rate": 3.522963733650416e-05, |
|
"loss": 2.5835, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 5.926724137931035, |
|
"grad_norm": 6.335425853729248, |
|
"learning_rate": 3.518318965517242e-05, |
|
"loss": 2.5823, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 5.945303210463734, |
|
"grad_norm": 6.930739879608154, |
|
"learning_rate": 3.513674197384067e-05, |
|
"loss": 2.5829, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 5.963882282996433, |
|
"grad_norm": 6.215325832366943, |
|
"learning_rate": 3.509029429250892e-05, |
|
"loss": 2.5851, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 5.982461355529132, |
|
"grad_norm": 5.954530239105225, |
|
"learning_rate": 3.504384661117717e-05, |
|
"loss": 2.5843, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.001040428061831, |
|
"grad_norm": 6.572493076324463, |
|
"learning_rate": 3.4997398929845426e-05, |
|
"loss": 2.5786, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 6.01961950059453, |
|
"grad_norm": 6.725315093994141, |
|
"learning_rate": 3.495095124851368e-05, |
|
"loss": 2.5384, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.03819857312723, |
|
"grad_norm": 6.982800483703613, |
|
"learning_rate": 3.490450356718193e-05, |
|
"loss": 2.5501, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 6.0567776456599285, |
|
"grad_norm": 6.240696430206299, |
|
"learning_rate": 3.4858055885850185e-05, |
|
"loss": 2.5446, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.075356718192628, |
|
"grad_norm": 6.301703453063965, |
|
"learning_rate": 3.481160820451843e-05, |
|
"loss": 2.5471, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 6.093935790725327, |
|
"grad_norm": 7.478944778442383, |
|
"learning_rate": 3.4765160523186686e-05, |
|
"loss": 2.5473, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.112514863258026, |
|
"grad_norm": 6.435521125793457, |
|
"learning_rate": 3.4718712841854936e-05, |
|
"loss": 2.5417, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 6.131093935790726, |
|
"grad_norm": 7.630947589874268, |
|
"learning_rate": 3.467226516052319e-05, |
|
"loss": 2.5365, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.149673008323425, |
|
"grad_norm": 7.021152973175049, |
|
"learning_rate": 3.4625817479191444e-05, |
|
"loss": 2.5493, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 6.168252080856123, |
|
"grad_norm": 6.182572841644287, |
|
"learning_rate": 3.457936979785969e-05, |
|
"loss": 2.5435, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.186831153388823, |
|
"grad_norm": 7.1868767738342285, |
|
"learning_rate": 3.4532922116527945e-05, |
|
"loss": 2.5461, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 6.205410225921522, |
|
"grad_norm": 6.992275714874268, |
|
"learning_rate": 3.4486474435196195e-05, |
|
"loss": 2.5243, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.223989298454221, |
|
"grad_norm": 6.819701194763184, |
|
"learning_rate": 3.4440026753864446e-05, |
|
"loss": 2.533, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 6.242568370986921, |
|
"grad_norm": 7.018156051635742, |
|
"learning_rate": 3.43935790725327e-05, |
|
"loss": 2.5373, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.2611474435196195, |
|
"grad_norm": 6.9675188064575195, |
|
"learning_rate": 3.4347131391200954e-05, |
|
"loss": 2.5354, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 6.279726516052318, |
|
"grad_norm": 6.449595928192139, |
|
"learning_rate": 3.4300683709869204e-05, |
|
"loss": 2.5198, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.298305588585018, |
|
"grad_norm": 6.839005470275879, |
|
"learning_rate": 3.4254236028537455e-05, |
|
"loss": 2.529, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 6.316884661117717, |
|
"grad_norm": 6.704039096832275, |
|
"learning_rate": 3.420778834720571e-05, |
|
"loss": 2.5365, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.335463733650416, |
|
"grad_norm": 6.419273853302002, |
|
"learning_rate": 3.4161340665873956e-05, |
|
"loss": 2.5441, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 6.354042806183116, |
|
"grad_norm": 7.052849292755127, |
|
"learning_rate": 3.411489298454221e-05, |
|
"loss": 2.5277, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.372621878715814, |
|
"grad_norm": 7.161109447479248, |
|
"learning_rate": 3.4068445303210463e-05, |
|
"loss": 2.522, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 6.391200951248513, |
|
"grad_norm": 6.348012447357178, |
|
"learning_rate": 3.4021997621878714e-05, |
|
"loss": 2.524, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.409780023781213, |
|
"grad_norm": 6.336347579956055, |
|
"learning_rate": 3.397554994054697e-05, |
|
"loss": 2.5222, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 6.428359096313912, |
|
"grad_norm": 5.670961380004883, |
|
"learning_rate": 3.392910225921522e-05, |
|
"loss": 2.5421, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.446938168846611, |
|
"grad_norm": 6.4542717933654785, |
|
"learning_rate": 3.388265457788347e-05, |
|
"loss": 2.5347, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 6.4655172413793105, |
|
"grad_norm": 6.445559024810791, |
|
"learning_rate": 3.383620689655172e-05, |
|
"loss": 2.5161, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.484096313912009, |
|
"grad_norm": 6.366390228271484, |
|
"learning_rate": 3.378975921521998e-05, |
|
"loss": 2.5254, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 6.502675386444709, |
|
"grad_norm": 5.990453720092773, |
|
"learning_rate": 3.374331153388823e-05, |
|
"loss": 2.5291, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.521254458977408, |
|
"grad_norm": 7.384641170501709, |
|
"learning_rate": 3.369686385255648e-05, |
|
"loss": 2.5273, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 6.539833531510107, |
|
"grad_norm": 7.899537563323975, |
|
"learning_rate": 3.365041617122474e-05, |
|
"loss": 2.5217, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.558412604042807, |
|
"grad_norm": 7.456251621246338, |
|
"learning_rate": 3.360396848989298e-05, |
|
"loss": 2.5183, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 6.576991676575505, |
|
"grad_norm": 7.728662967681885, |
|
"learning_rate": 3.355752080856124e-05, |
|
"loss": 2.5242, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.595570749108204, |
|
"grad_norm": 6.4795708656311035, |
|
"learning_rate": 3.351107312722949e-05, |
|
"loss": 2.5148, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 6.614149821640904, |
|
"grad_norm": 6.569213390350342, |
|
"learning_rate": 3.346462544589774e-05, |
|
"loss": 2.513, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 6.632728894173603, |
|
"grad_norm": 5.95412015914917, |
|
"learning_rate": 3.3418177764566e-05, |
|
"loss": 2.5141, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 6.651307966706302, |
|
"grad_norm": 6.39993143081665, |
|
"learning_rate": 3.337173008323425e-05, |
|
"loss": 2.5209, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 6.6698870392390015, |
|
"grad_norm": 6.558811664581299, |
|
"learning_rate": 3.33252824019025e-05, |
|
"loss": 2.5168, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 6.6884661117717, |
|
"grad_norm": 6.443490982055664, |
|
"learning_rate": 3.327883472057075e-05, |
|
"loss": 2.5122, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 6.707045184304399, |
|
"grad_norm": 6.475789546966553, |
|
"learning_rate": 3.3232387039239006e-05, |
|
"loss": 2.5265, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 6.725624256837099, |
|
"grad_norm": 6.097142219543457, |
|
"learning_rate": 3.318593935790725e-05, |
|
"loss": 2.4953, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 6.744203329369798, |
|
"grad_norm": 6.5415849685668945, |
|
"learning_rate": 3.313949167657551e-05, |
|
"loss": 2.504, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 6.762782401902497, |
|
"grad_norm": 6.81630277633667, |
|
"learning_rate": 3.309304399524376e-05, |
|
"loss": 2.515, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 6.781361474435196, |
|
"grad_norm": 6.129889488220215, |
|
"learning_rate": 3.304659631391201e-05, |
|
"loss": 2.4993, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 6.799940546967895, |
|
"grad_norm": 6.960236072540283, |
|
"learning_rate": 3.3000148632580265e-05, |
|
"loss": 2.5146, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 6.818519619500594, |
|
"grad_norm": 7.540809154510498, |
|
"learning_rate": 3.2953700951248516e-05, |
|
"loss": 2.4899, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 6.837098692033294, |
|
"grad_norm": 6.699360370635986, |
|
"learning_rate": 3.2907253269916766e-05, |
|
"loss": 2.5032, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 6.855677764565993, |
|
"grad_norm": 6.967233180999756, |
|
"learning_rate": 3.286080558858502e-05, |
|
"loss": 2.5079, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 6.874256837098692, |
|
"grad_norm": 6.475770473480225, |
|
"learning_rate": 3.2814357907253274e-05, |
|
"loss": 2.5278, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 6.892835909631391, |
|
"grad_norm": 7.317842483520508, |
|
"learning_rate": 3.2767910225921525e-05, |
|
"loss": 2.4923, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 6.91141498216409, |
|
"grad_norm": 6.920095443725586, |
|
"learning_rate": 3.2721462544589775e-05, |
|
"loss": 2.5094, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 6.92999405469679, |
|
"grad_norm": 6.825646877288818, |
|
"learning_rate": 3.267501486325803e-05, |
|
"loss": 2.4832, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 6.948573127229489, |
|
"grad_norm": 7.122133255004883, |
|
"learning_rate": 3.2628567181926276e-05, |
|
"loss": 2.5033, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 6.967152199762188, |
|
"grad_norm": 6.76582145690918, |
|
"learning_rate": 3.2582119500594533e-05, |
|
"loss": 2.4841, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 6.9857312722948866, |
|
"grad_norm": 6.667826175689697, |
|
"learning_rate": 3.2535671819262784e-05, |
|
"loss": 2.5107, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.004310344827586, |
|
"grad_norm": 6.997557640075684, |
|
"learning_rate": 3.2489224137931034e-05, |
|
"loss": 2.4932, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 7.022889417360285, |
|
"grad_norm": 7.383888244628906, |
|
"learning_rate": 3.244277645659929e-05, |
|
"loss": 2.4599, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.041468489892985, |
|
"grad_norm": 6.721455097198486, |
|
"learning_rate": 3.239632877526754e-05, |
|
"loss": 2.461, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 7.060047562425684, |
|
"grad_norm": 5.605399131774902, |
|
"learning_rate": 3.234988109393579e-05, |
|
"loss": 2.4733, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.078626634958383, |
|
"grad_norm": 6.753121852874756, |
|
"learning_rate": 3.230343341260404e-05, |
|
"loss": 2.4739, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 7.097205707491082, |
|
"grad_norm": 6.254253387451172, |
|
"learning_rate": 3.22569857312723e-05, |
|
"loss": 2.4702, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.115784780023781, |
|
"grad_norm": 7.067044258117676, |
|
"learning_rate": 3.2210538049940544e-05, |
|
"loss": 2.4565, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 7.13436385255648, |
|
"grad_norm": 6.651601791381836, |
|
"learning_rate": 3.21640903686088e-05, |
|
"loss": 2.4591, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.15294292508918, |
|
"grad_norm": 7.131402015686035, |
|
"learning_rate": 3.211764268727705e-05, |
|
"loss": 2.4699, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 7.171521997621879, |
|
"grad_norm": 6.57224702835083, |
|
"learning_rate": 3.20711950059453e-05, |
|
"loss": 2.4652, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.190101070154578, |
|
"grad_norm": 6.226948261260986, |
|
"learning_rate": 3.202474732461356e-05, |
|
"loss": 2.4553, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 7.208680142687277, |
|
"grad_norm": 6.283173561096191, |
|
"learning_rate": 3.197829964328181e-05, |
|
"loss": 2.4666, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.227259215219976, |
|
"grad_norm": 6.692994117736816, |
|
"learning_rate": 3.193185196195006e-05, |
|
"loss": 2.4634, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 7.245838287752675, |
|
"grad_norm": 5.542157173156738, |
|
"learning_rate": 3.188540428061831e-05, |
|
"loss": 2.4483, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.264417360285375, |
|
"grad_norm": 7.492745876312256, |
|
"learning_rate": 3.183895659928656e-05, |
|
"loss": 2.467, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 7.282996432818074, |
|
"grad_norm": 6.997331619262695, |
|
"learning_rate": 3.179250891795482e-05, |
|
"loss": 2.4562, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.3015755053507725, |
|
"grad_norm": 7.160475730895996, |
|
"learning_rate": 3.174606123662307e-05, |
|
"loss": 2.4645, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 7.320154577883472, |
|
"grad_norm": 6.583847522735596, |
|
"learning_rate": 3.169961355529132e-05, |
|
"loss": 2.4591, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.338733650416171, |
|
"grad_norm": 7.247707366943359, |
|
"learning_rate": 3.165316587395957e-05, |
|
"loss": 2.447, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 7.357312722948871, |
|
"grad_norm": 6.818671226501465, |
|
"learning_rate": 3.160671819262783e-05, |
|
"loss": 2.4524, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.37589179548157, |
|
"grad_norm": 6.533426284790039, |
|
"learning_rate": 3.156027051129608e-05, |
|
"loss": 2.4614, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 7.394470868014269, |
|
"grad_norm": 6.117506504058838, |
|
"learning_rate": 3.151382282996433e-05, |
|
"loss": 2.4523, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.413049940546968, |
|
"grad_norm": 6.545726776123047, |
|
"learning_rate": 3.1467375148632586e-05, |
|
"loss": 2.4413, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 7.431629013079667, |
|
"grad_norm": 6.267510414123535, |
|
"learning_rate": 3.142092746730083e-05, |
|
"loss": 2.4626, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.450208085612366, |
|
"grad_norm": 6.45424222946167, |
|
"learning_rate": 3.137447978596909e-05, |
|
"loss": 2.4525, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 7.468787158145066, |
|
"grad_norm": 6.826467990875244, |
|
"learning_rate": 3.132803210463734e-05, |
|
"loss": 2.4535, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 7.487366230677765, |
|
"grad_norm": 6.419857501983643, |
|
"learning_rate": 3.128158442330559e-05, |
|
"loss": 2.4567, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 7.5059453032104635, |
|
"grad_norm": 7.912742614746094, |
|
"learning_rate": 3.123513674197384e-05, |
|
"loss": 2.4352, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 7.524524375743163, |
|
"grad_norm": 6.15361213684082, |
|
"learning_rate": 3.1188689060642096e-05, |
|
"loss": 2.4525, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 7.543103448275862, |
|
"grad_norm": 6.077796936035156, |
|
"learning_rate": 3.1142241379310346e-05, |
|
"loss": 2.4621, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 7.561682520808561, |
|
"grad_norm": 6.890556335449219, |
|
"learning_rate": 3.10957936979786e-05, |
|
"loss": 2.4454, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 7.580261593341261, |
|
"grad_norm": 7.002103328704834, |
|
"learning_rate": 3.1049346016646854e-05, |
|
"loss": 2.4737, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 7.59884066587396, |
|
"grad_norm": 7.24050760269165, |
|
"learning_rate": 3.10028983353151e-05, |
|
"loss": 2.4369, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 7.617419738406658, |
|
"grad_norm": 7.357000827789307, |
|
"learning_rate": 3.0956450653983355e-05, |
|
"loss": 2.4338, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 7.635998810939358, |
|
"grad_norm": 6.06101131439209, |
|
"learning_rate": 3.0910002972651605e-05, |
|
"loss": 2.4378, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 7.654577883472057, |
|
"grad_norm": 7.14568567276001, |
|
"learning_rate": 3.0863555291319856e-05, |
|
"loss": 2.4448, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 7.673156956004756, |
|
"grad_norm": 6.747462272644043, |
|
"learning_rate": 3.081710760998811e-05, |
|
"loss": 2.4604, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 7.691736028537456, |
|
"grad_norm": 7.445852756500244, |
|
"learning_rate": 3.0770659928656364e-05, |
|
"loss": 2.4454, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 7.7103151010701545, |
|
"grad_norm": 6.196556568145752, |
|
"learning_rate": 3.0724212247324614e-05, |
|
"loss": 2.4378, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 7.728894173602853, |
|
"grad_norm": 6.7122039794921875, |
|
"learning_rate": 3.0677764565992865e-05, |
|
"loss": 2.4286, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 7.747473246135553, |
|
"grad_norm": 7.239169120788574, |
|
"learning_rate": 3.063131688466112e-05, |
|
"loss": 2.4459, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 7.766052318668252, |
|
"grad_norm": 5.942273139953613, |
|
"learning_rate": 3.058486920332937e-05, |
|
"loss": 2.4554, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 7.784631391200952, |
|
"grad_norm": 6.494337558746338, |
|
"learning_rate": 3.053842152199762e-05, |
|
"loss": 2.4404, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 7.803210463733651, |
|
"grad_norm": 7.0354084968566895, |
|
"learning_rate": 3.0491973840665877e-05, |
|
"loss": 2.4454, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 7.821789536266349, |
|
"grad_norm": 6.828258037567139, |
|
"learning_rate": 3.0445526159334127e-05, |
|
"loss": 2.4325, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 7.840368608799048, |
|
"grad_norm": 7.0825724601745605, |
|
"learning_rate": 3.039907847800238e-05, |
|
"loss": 2.4289, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 7.858947681331748, |
|
"grad_norm": 7.346935749053955, |
|
"learning_rate": 3.0352630796670635e-05, |
|
"loss": 2.4548, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 7.877526753864447, |
|
"grad_norm": 6.80324125289917, |
|
"learning_rate": 3.0306183115338882e-05, |
|
"loss": 2.4507, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 7.896105826397147, |
|
"grad_norm": 6.606076717376709, |
|
"learning_rate": 3.0259735434007136e-05, |
|
"loss": 2.43, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 7.9146848989298455, |
|
"grad_norm": 7.006173133850098, |
|
"learning_rate": 3.021328775267539e-05, |
|
"loss": 2.4376, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 7.933263971462544, |
|
"grad_norm": 6.683685779571533, |
|
"learning_rate": 3.0166840071343637e-05, |
|
"loss": 2.4173, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 7.951843043995244, |
|
"grad_norm": 7.217254161834717, |
|
"learning_rate": 3.012039239001189e-05, |
|
"loss": 2.4465, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 7.970422116527943, |
|
"grad_norm": 6.2831573486328125, |
|
"learning_rate": 3.0073944708680145e-05, |
|
"loss": 2.4347, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 7.989001189060642, |
|
"grad_norm": 7.447052955627441, |
|
"learning_rate": 3.0027497027348395e-05, |
|
"loss": 2.4346, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 8.007580261593342, |
|
"grad_norm": 6.3113884925842285, |
|
"learning_rate": 2.998104934601665e-05, |
|
"loss": 2.4264, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 8.02615933412604, |
|
"grad_norm": 7.349926948547363, |
|
"learning_rate": 2.9934601664684903e-05, |
|
"loss": 2.409, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 8.04473840665874, |
|
"grad_norm": 7.6226959228515625, |
|
"learning_rate": 2.988815398335315e-05, |
|
"loss": 2.4047, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 8.063317479191438, |
|
"grad_norm": 7.297638893127441, |
|
"learning_rate": 2.9841706302021404e-05, |
|
"loss": 2.4064, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 8.081896551724139, |
|
"grad_norm": 6.703174114227295, |
|
"learning_rate": 2.9795258620689658e-05, |
|
"loss": 2.3948, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 8.100475624256838, |
|
"grad_norm": 7.86271858215332, |
|
"learning_rate": 2.974881093935791e-05, |
|
"loss": 2.4033, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 8.119054696789537, |
|
"grad_norm": 6.666792392730713, |
|
"learning_rate": 2.9702363258026162e-05, |
|
"loss": 2.4189, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 8.137633769322235, |
|
"grad_norm": 7.112173557281494, |
|
"learning_rate": 2.9655915576694416e-05, |
|
"loss": 2.4139, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 8.156212841854934, |
|
"grad_norm": 7.117358684539795, |
|
"learning_rate": 2.9609467895362663e-05, |
|
"loss": 2.4092, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 8.174791914387633, |
|
"grad_norm": 5.946983337402344, |
|
"learning_rate": 2.9563020214030917e-05, |
|
"loss": 2.403, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 8.193370986920334, |
|
"grad_norm": 6.8523030281066895, |
|
"learning_rate": 2.951657253269917e-05, |
|
"loss": 2.3906, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 8.211950059453033, |
|
"grad_norm": 6.419975280761719, |
|
"learning_rate": 2.947012485136742e-05, |
|
"loss": 2.4058, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 8.230529131985731, |
|
"grad_norm": 7.008522033691406, |
|
"learning_rate": 2.9423677170035675e-05, |
|
"loss": 2.3949, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 8.24910820451843, |
|
"grad_norm": 6.398033618927002, |
|
"learning_rate": 2.937722948870393e-05, |
|
"loss": 2.3894, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 8.26768727705113, |
|
"grad_norm": 6.911588668823242, |
|
"learning_rate": 2.9330781807372176e-05, |
|
"loss": 2.4026, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 8.286266349583828, |
|
"grad_norm": 6.391911029815674, |
|
"learning_rate": 2.928433412604043e-05, |
|
"loss": 2.388, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 8.304845422116529, |
|
"grad_norm": 6.878973484039307, |
|
"learning_rate": 2.9237886444708684e-05, |
|
"loss": 2.3964, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 8.323424494649228, |
|
"grad_norm": 6.681369781494141, |
|
"learning_rate": 2.919143876337693e-05, |
|
"loss": 2.4049, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 8.342003567181926, |
|
"grad_norm": 6.652570724487305, |
|
"learning_rate": 2.9144991082045185e-05, |
|
"loss": 2.3935, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 8.360582639714625, |
|
"grad_norm": 6.757369041442871, |
|
"learning_rate": 2.909854340071344e-05, |
|
"loss": 2.4024, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 8.379161712247324, |
|
"grad_norm": 7.730061054229736, |
|
"learning_rate": 2.905209571938169e-05, |
|
"loss": 2.3815, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 8.397740784780023, |
|
"grad_norm": 7.044541358947754, |
|
"learning_rate": 2.9005648038049943e-05, |
|
"loss": 2.3872, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 8.416319857312724, |
|
"grad_norm": 7.104819297790527, |
|
"learning_rate": 2.895920035671819e-05, |
|
"loss": 2.3888, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 8.434898929845422, |
|
"grad_norm": 6.207997798919678, |
|
"learning_rate": 2.8912752675386444e-05, |
|
"loss": 2.3901, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 8.453478002378121, |
|
"grad_norm": 6.412841796875, |
|
"learning_rate": 2.8866304994054698e-05, |
|
"loss": 2.404, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 8.47205707491082, |
|
"grad_norm": 7.31563663482666, |
|
"learning_rate": 2.881985731272295e-05, |
|
"loss": 2.3952, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 8.490636147443519, |
|
"grad_norm": 6.783107757568359, |
|
"learning_rate": 2.8773409631391203e-05, |
|
"loss": 2.369, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 8.50921521997622, |
|
"grad_norm": 7.456410884857178, |
|
"learning_rate": 2.8726961950059456e-05, |
|
"loss": 2.3955, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 8.527794292508919, |
|
"grad_norm": 6.817208766937256, |
|
"learning_rate": 2.8680514268727704e-05, |
|
"loss": 2.3777, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 8.546373365041617, |
|
"grad_norm": 6.829710483551025, |
|
"learning_rate": 2.8634066587395957e-05, |
|
"loss": 2.3803, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 8.564952437574316, |
|
"grad_norm": 6.171419620513916, |
|
"learning_rate": 2.858761890606421e-05, |
|
"loss": 2.3867, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 8.583531510107015, |
|
"grad_norm": 7.179515361785889, |
|
"learning_rate": 2.8541171224732462e-05, |
|
"loss": 2.3819, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 8.602110582639714, |
|
"grad_norm": 7.424422264099121, |
|
"learning_rate": 2.8494723543400716e-05, |
|
"loss": 2.3892, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 8.620689655172415, |
|
"grad_norm": 6.56906795501709, |
|
"learning_rate": 2.844827586206897e-05, |
|
"loss": 2.3875, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 8.639268727705113, |
|
"grad_norm": 5.986749649047852, |
|
"learning_rate": 2.8401828180737217e-05, |
|
"loss": 2.3881, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 8.657847800237812, |
|
"grad_norm": 7.885437965393066, |
|
"learning_rate": 2.835538049940547e-05, |
|
"loss": 2.3898, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 8.676426872770511, |
|
"grad_norm": 8.217313766479492, |
|
"learning_rate": 2.8308932818073724e-05, |
|
"loss": 2.3853, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 8.69500594530321, |
|
"grad_norm": 7.467879295349121, |
|
"learning_rate": 2.826248513674197e-05, |
|
"loss": 2.3894, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 8.713585017835909, |
|
"grad_norm": 6.856407642364502, |
|
"learning_rate": 2.8216037455410225e-05, |
|
"loss": 2.3884, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 8.73216409036861, |
|
"grad_norm": 7.717813014984131, |
|
"learning_rate": 2.816958977407848e-05, |
|
"loss": 2.3735, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 8.750743162901308, |
|
"grad_norm": 6.215982913970947, |
|
"learning_rate": 2.812314209274673e-05, |
|
"loss": 2.3704, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 8.769322235434007, |
|
"grad_norm": 5.821375370025635, |
|
"learning_rate": 2.8076694411414984e-05, |
|
"loss": 2.3821, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 8.787901307966706, |
|
"grad_norm": 5.752195358276367, |
|
"learning_rate": 2.8030246730083238e-05, |
|
"loss": 2.362, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 8.806480380499405, |
|
"grad_norm": 7.1153693199157715, |
|
"learning_rate": 2.7983799048751485e-05, |
|
"loss": 2.3804, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 8.825059453032104, |
|
"grad_norm": 7.165075302124023, |
|
"learning_rate": 2.793735136741974e-05, |
|
"loss": 2.3749, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 8.843638525564804, |
|
"grad_norm": 7.609332084655762, |
|
"learning_rate": 2.7890903686087992e-05, |
|
"loss": 2.3783, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 8.862217598097503, |
|
"grad_norm": 7.269701957702637, |
|
"learning_rate": 2.7844456004756243e-05, |
|
"loss": 2.3612, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 8.880796670630202, |
|
"grad_norm": 6.229999542236328, |
|
"learning_rate": 2.7798008323424497e-05, |
|
"loss": 2.3674, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 8.899375743162901, |
|
"grad_norm": 6.712778568267822, |
|
"learning_rate": 2.775156064209275e-05, |
|
"loss": 2.4045, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 8.9179548156956, |
|
"grad_norm": 6.752030372619629, |
|
"learning_rate": 2.7705112960760998e-05, |
|
"loss": 2.3665, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 8.9365338882283, |
|
"grad_norm": 7.107761383056641, |
|
"learning_rate": 2.765866527942925e-05, |
|
"loss": 2.3757, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 8.955112960761, |
|
"grad_norm": 6.4916300773620605, |
|
"learning_rate": 2.7612217598097506e-05, |
|
"loss": 2.364, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 8.973692033293698, |
|
"grad_norm": 6.902660846710205, |
|
"learning_rate": 2.7565769916765756e-05, |
|
"loss": 2.3945, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 8.992271105826397, |
|
"grad_norm": 6.676261901855469, |
|
"learning_rate": 2.751932223543401e-05, |
|
"loss": 2.368, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 9.010850178359096, |
|
"grad_norm": 7.0637125968933105, |
|
"learning_rate": 2.7472874554102264e-05, |
|
"loss": 2.3526, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 9.029429250891795, |
|
"grad_norm": 6.886041164398193, |
|
"learning_rate": 2.742642687277051e-05, |
|
"loss": 2.3409, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 9.048008323424495, |
|
"grad_norm": 6.17530632019043, |
|
"learning_rate": 2.7379979191438765e-05, |
|
"loss": 2.3368, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 9.066587395957194, |
|
"grad_norm": 6.835616588592529, |
|
"learning_rate": 2.733353151010702e-05, |
|
"loss": 2.3521, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 9.085166468489893, |
|
"grad_norm": 7.837756156921387, |
|
"learning_rate": 2.7287083828775266e-05, |
|
"loss": 2.3517, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 9.103745541022592, |
|
"grad_norm": 7.3295793533325195, |
|
"learning_rate": 2.724063614744352e-05, |
|
"loss": 2.3351, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 9.122324613555291, |
|
"grad_norm": 6.278160095214844, |
|
"learning_rate": 2.7194188466111774e-05, |
|
"loss": 2.3544, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 9.14090368608799, |
|
"grad_norm": 6.8166823387146, |
|
"learning_rate": 2.7147740784780024e-05, |
|
"loss": 2.3562, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 9.15948275862069, |
|
"grad_norm": 6.9190473556518555, |
|
"learning_rate": 2.7101293103448278e-05, |
|
"loss": 2.3707, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 9.17806183115339, |
|
"grad_norm": 8.471137046813965, |
|
"learning_rate": 2.7054845422116532e-05, |
|
"loss": 2.3348, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 9.196640903686088, |
|
"grad_norm": 7.1549553871154785, |
|
"learning_rate": 2.700839774078478e-05, |
|
"loss": 2.3482, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 9.215219976218787, |
|
"grad_norm": 7.972681999206543, |
|
"learning_rate": 2.6961950059453033e-05, |
|
"loss": 2.3327, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 9.233799048751486, |
|
"grad_norm": 6.290485858917236, |
|
"learning_rate": 2.6915502378121287e-05, |
|
"loss": 2.3344, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 9.252378121284185, |
|
"grad_norm": 7.835150718688965, |
|
"learning_rate": 2.6869054696789537e-05, |
|
"loss": 2.3523, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 9.270957193816885, |
|
"grad_norm": 6.171538829803467, |
|
"learning_rate": 2.682260701545779e-05, |
|
"loss": 2.3439, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 9.289536266349584, |
|
"grad_norm": 6.854957580566406, |
|
"learning_rate": 2.6776159334126045e-05, |
|
"loss": 2.348, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 9.308115338882283, |
|
"grad_norm": 6.949794769287109, |
|
"learning_rate": 2.6729711652794292e-05, |
|
"loss": 2.3416, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 9.326694411414982, |
|
"grad_norm": 7.924169540405273, |
|
"learning_rate": 2.6683263971462546e-05, |
|
"loss": 2.341, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 9.34527348394768, |
|
"grad_norm": 6.802456378936768, |
|
"learning_rate": 2.66368162901308e-05, |
|
"loss": 2.3373, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 9.363852556480381, |
|
"grad_norm": 5.974133491516113, |
|
"learning_rate": 2.659036860879905e-05, |
|
"loss": 2.3447, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 9.38243162901308, |
|
"grad_norm": 7.3315277099609375, |
|
"learning_rate": 2.6543920927467304e-05, |
|
"loss": 2.345, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 9.40101070154578, |
|
"grad_norm": 7.01455020904541, |
|
"learning_rate": 2.6497473246135558e-05, |
|
"loss": 2.3354, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 9.419589774078478, |
|
"grad_norm": 6.553669452667236, |
|
"learning_rate": 2.6451025564803805e-05, |
|
"loss": 2.3505, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 9.438168846611177, |
|
"grad_norm": 7.384204387664795, |
|
"learning_rate": 2.640457788347206e-05, |
|
"loss": 2.3406, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 9.456747919143876, |
|
"grad_norm": 7.899343490600586, |
|
"learning_rate": 2.6358130202140313e-05, |
|
"loss": 2.3534, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 9.475326991676576, |
|
"grad_norm": 6.718962669372559, |
|
"learning_rate": 2.631168252080856e-05, |
|
"loss": 2.3447, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 9.493906064209275, |
|
"grad_norm": 7.7100629806518555, |
|
"learning_rate": 2.6265234839476814e-05, |
|
"loss": 2.3378, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 9.512485136741974, |
|
"grad_norm": 6.307003974914551, |
|
"learning_rate": 2.6218787158145064e-05, |
|
"loss": 2.3673, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 9.531064209274673, |
|
"grad_norm": 6.968733787536621, |
|
"learning_rate": 2.6172339476813318e-05, |
|
"loss": 2.3502, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 9.549643281807372, |
|
"grad_norm": 7.223754405975342, |
|
"learning_rate": 2.6125891795481572e-05, |
|
"loss": 2.3397, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 9.56822235434007, |
|
"grad_norm": 7.984851360321045, |
|
"learning_rate": 2.607944411414982e-05, |
|
"loss": 2.3394, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 9.586801426872771, |
|
"grad_norm": 6.745290279388428, |
|
"learning_rate": 2.6032996432818073e-05, |
|
"loss": 2.357, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 9.60538049940547, |
|
"grad_norm": 6.241764068603516, |
|
"learning_rate": 2.5986548751486327e-05, |
|
"loss": 2.3294, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 9.623959571938169, |
|
"grad_norm": 6.849953651428223, |
|
"learning_rate": 2.5940101070154577e-05, |
|
"loss": 2.319, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 9.642538644470868, |
|
"grad_norm": 6.786033630371094, |
|
"learning_rate": 2.589365338882283e-05, |
|
"loss": 2.3381, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 9.661117717003567, |
|
"grad_norm": 6.5294952392578125, |
|
"learning_rate": 2.5847205707491085e-05, |
|
"loss": 2.3292, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 9.679696789536266, |
|
"grad_norm": 6.852995872497559, |
|
"learning_rate": 2.5800758026159332e-05, |
|
"loss": 2.3513, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 9.698275862068966, |
|
"grad_norm": 7.107331275939941, |
|
"learning_rate": 2.5754310344827586e-05, |
|
"loss": 2.342, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 9.716854934601665, |
|
"grad_norm": 6.497838020324707, |
|
"learning_rate": 2.570786266349584e-05, |
|
"loss": 2.3518, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 9.735434007134364, |
|
"grad_norm": 7.103449821472168, |
|
"learning_rate": 2.566141498216409e-05, |
|
"loss": 2.3243, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 9.754013079667063, |
|
"grad_norm": 6.207728862762451, |
|
"learning_rate": 2.5614967300832344e-05, |
|
"loss": 2.3295, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 9.772592152199762, |
|
"grad_norm": 6.938514232635498, |
|
"learning_rate": 2.55685196195006e-05, |
|
"loss": 2.3378, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 9.791171224732462, |
|
"grad_norm": 8.32728385925293, |
|
"learning_rate": 2.5522071938168845e-05, |
|
"loss": 2.3397, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 9.809750297265161, |
|
"grad_norm": 7.170902729034424, |
|
"learning_rate": 2.54756242568371e-05, |
|
"loss": 2.3152, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 9.82832936979786, |
|
"grad_norm": 6.303475856781006, |
|
"learning_rate": 2.5429176575505353e-05, |
|
"loss": 2.335, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 9.846908442330559, |
|
"grad_norm": 6.429758548736572, |
|
"learning_rate": 2.53827288941736e-05, |
|
"loss": 2.3193, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 9.865487514863258, |
|
"grad_norm": 7.365509986877441, |
|
"learning_rate": 2.5336281212841854e-05, |
|
"loss": 2.3291, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 9.884066587395957, |
|
"grad_norm": 6.403247356414795, |
|
"learning_rate": 2.528983353151011e-05, |
|
"loss": 2.3289, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 9.902645659928655, |
|
"grad_norm": 6.402617454528809, |
|
"learning_rate": 2.524338585017836e-05, |
|
"loss": 2.3383, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 9.921224732461356, |
|
"grad_norm": 8.039521217346191, |
|
"learning_rate": 2.5196938168846612e-05, |
|
"loss": 2.3535, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 9.939803804994055, |
|
"grad_norm": 6.797732830047607, |
|
"learning_rate": 2.5150490487514866e-05, |
|
"loss": 2.3102, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 9.958382877526754, |
|
"grad_norm": 6.878042221069336, |
|
"learning_rate": 2.5104042806183113e-05, |
|
"loss": 2.3252, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 9.976961950059453, |
|
"grad_norm": 7.837581634521484, |
|
"learning_rate": 2.5057595124851367e-05, |
|
"loss": 2.3259, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 9.995541022592151, |
|
"grad_norm": 7.878035545349121, |
|
"learning_rate": 2.501114744351962e-05, |
|
"loss": 2.3206, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 10.014120095124852, |
|
"grad_norm": 7.06614875793457, |
|
"learning_rate": 2.4964699762187875e-05, |
|
"loss": 2.3031, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 10.032699167657551, |
|
"grad_norm": 6.305147647857666, |
|
"learning_rate": 2.4918252080856126e-05, |
|
"loss": 2.2958, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 10.05127824019025, |
|
"grad_norm": 7.321694374084473, |
|
"learning_rate": 2.4871804399524376e-05, |
|
"loss": 2.3102, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 10.069857312722949, |
|
"grad_norm": 6.2910356521606445, |
|
"learning_rate": 2.482535671819263e-05, |
|
"loss": 2.3087, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 10.088436385255648, |
|
"grad_norm": 6.352067470550537, |
|
"learning_rate": 2.477890903686088e-05, |
|
"loss": 2.2997, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 10.107015457788346, |
|
"grad_norm": 7.583943843841553, |
|
"learning_rate": 2.473246135552913e-05, |
|
"loss": 2.2976, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 10.125594530321047, |
|
"grad_norm": 6.128369331359863, |
|
"learning_rate": 2.4686013674197385e-05, |
|
"loss": 2.3184, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 10.144173602853746, |
|
"grad_norm": 7.117658615112305, |
|
"learning_rate": 2.463956599286564e-05, |
|
"loss": 2.297, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 10.162752675386445, |
|
"grad_norm": 6.37664270401001, |
|
"learning_rate": 2.459311831153389e-05, |
|
"loss": 2.3054, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 10.181331747919144, |
|
"grad_norm": 8.254295349121094, |
|
"learning_rate": 2.4546670630202143e-05, |
|
"loss": 2.2856, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 10.199910820451842, |
|
"grad_norm": 7.399996757507324, |
|
"learning_rate": 2.4500222948870394e-05, |
|
"loss": 2.3191, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 10.218489892984541, |
|
"grad_norm": 7.4784464836120605, |
|
"learning_rate": 2.4453775267538644e-05, |
|
"loss": 2.2994, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 10.237068965517242, |
|
"grad_norm": 7.332183837890625, |
|
"learning_rate": 2.4407327586206898e-05, |
|
"loss": 2.3022, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 10.25564803804994, |
|
"grad_norm": 6.316469192504883, |
|
"learning_rate": 2.4360879904875152e-05, |
|
"loss": 2.306, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 10.27422711058264, |
|
"grad_norm": 7.272724628448486, |
|
"learning_rate": 2.4314432223543402e-05, |
|
"loss": 2.293, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 10.292806183115339, |
|
"grad_norm": 7.283202171325684, |
|
"learning_rate": 2.4267984542211656e-05, |
|
"loss": 2.3086, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 10.311385255648037, |
|
"grad_norm": 6.357330799102783, |
|
"learning_rate": 2.4221536860879907e-05, |
|
"loss": 2.2958, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 10.329964328180738, |
|
"grad_norm": 6.361136436462402, |
|
"learning_rate": 2.4175089179548157e-05, |
|
"loss": 2.2856, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 10.348543400713437, |
|
"grad_norm": 7.32297420501709, |
|
"learning_rate": 2.4128641498216408e-05, |
|
"loss": 2.2904, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 10.367122473246136, |
|
"grad_norm": 7.6246161460876465, |
|
"learning_rate": 2.408219381688466e-05, |
|
"loss": 2.2872, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 10.385701545778835, |
|
"grad_norm": 6.27332067489624, |
|
"learning_rate": 2.4035746135552915e-05, |
|
"loss": 2.2829, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 10.404280618311534, |
|
"grad_norm": 7.062289714813232, |
|
"learning_rate": 2.3989298454221166e-05, |
|
"loss": 2.2938, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 10.422859690844232, |
|
"grad_norm": 8.132457733154297, |
|
"learning_rate": 2.394285077288942e-05, |
|
"loss": 2.2994, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 10.441438763376933, |
|
"grad_norm": 6.456370830535889, |
|
"learning_rate": 2.389640309155767e-05, |
|
"loss": 2.2845, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 10.460017835909632, |
|
"grad_norm": 8.033242225646973, |
|
"learning_rate": 2.384995541022592e-05, |
|
"loss": 2.2907, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 10.47859690844233, |
|
"grad_norm": 7.318391799926758, |
|
"learning_rate": 2.3803507728894175e-05, |
|
"loss": 2.288, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 10.49717598097503, |
|
"grad_norm": 6.92618465423584, |
|
"learning_rate": 2.3757060047562425e-05, |
|
"loss": 2.2875, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 10.515755053507728, |
|
"grad_norm": 6.721688747406006, |
|
"learning_rate": 2.371061236623068e-05, |
|
"loss": 2.295, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 10.534334126040427, |
|
"grad_norm": 7.079250335693359, |
|
"learning_rate": 2.3664164684898933e-05, |
|
"loss": 2.2806, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 10.552913198573128, |
|
"grad_norm": 7.229697227478027, |
|
"learning_rate": 2.3617717003567183e-05, |
|
"loss": 2.2828, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 10.571492271105827, |
|
"grad_norm": 6.85770845413208, |
|
"learning_rate": 2.3571269322235434e-05, |
|
"loss": 2.3038, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 10.590071343638526, |
|
"grad_norm": 7.07368803024292, |
|
"learning_rate": 2.3524821640903688e-05, |
|
"loss": 2.2918, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 10.608650416171225, |
|
"grad_norm": 7.446401119232178, |
|
"learning_rate": 2.3478373959571938e-05, |
|
"loss": 2.3097, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 10.627229488703923, |
|
"grad_norm": 7.388403415679932, |
|
"learning_rate": 2.3431926278240192e-05, |
|
"loss": 2.2753, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 10.645808561236624, |
|
"grad_norm": 7.510107517242432, |
|
"learning_rate": 2.3385478596908446e-05, |
|
"loss": 2.2592, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 10.664387633769323, |
|
"grad_norm": 6.856348514556885, |
|
"learning_rate": 2.3339030915576697e-05, |
|
"loss": 2.3018, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 10.682966706302022, |
|
"grad_norm": 5.952792644500732, |
|
"learning_rate": 2.3292583234244947e-05, |
|
"loss": 2.293, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 10.70154577883472, |
|
"grad_norm": 6.156429290771484, |
|
"learning_rate": 2.32461355529132e-05, |
|
"loss": 2.2794, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 10.72012485136742, |
|
"grad_norm": 7.464205741882324, |
|
"learning_rate": 2.319968787158145e-05, |
|
"loss": 2.2819, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 10.738703923900118, |
|
"grad_norm": 6.248416423797607, |
|
"learning_rate": 2.3153240190249702e-05, |
|
"loss": 2.2841, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 10.757282996432817, |
|
"grad_norm": 6.5093183517456055, |
|
"learning_rate": 2.3106792508917956e-05, |
|
"loss": 2.2974, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 10.775862068965518, |
|
"grad_norm": 6.669436454772949, |
|
"learning_rate": 2.306034482758621e-05, |
|
"loss": 2.2823, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 10.794441141498217, |
|
"grad_norm": 6.547306537628174, |
|
"learning_rate": 2.301389714625446e-05, |
|
"loss": 2.2783, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 10.813020214030916, |
|
"grad_norm": 7.420673847198486, |
|
"learning_rate": 2.2967449464922714e-05, |
|
"loss": 2.2803, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 10.831599286563614, |
|
"grad_norm": 7.08470344543457, |
|
"learning_rate": 2.2921001783590965e-05, |
|
"loss": 2.2897, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 10.850178359096313, |
|
"grad_norm": 7.092275142669678, |
|
"learning_rate": 2.2874554102259215e-05, |
|
"loss": 2.2842, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 10.868757431629014, |
|
"grad_norm": 6.814739227294922, |
|
"learning_rate": 2.282810642092747e-05, |
|
"loss": 2.2637, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 10.887336504161713, |
|
"grad_norm": 6.778537750244141, |
|
"learning_rate": 2.278165873959572e-05, |
|
"loss": 2.2802, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 10.905915576694412, |
|
"grad_norm": 6.529074668884277, |
|
"learning_rate": 2.2735211058263973e-05, |
|
"loss": 2.282, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 10.92449464922711, |
|
"grad_norm": 7.486764430999756, |
|
"learning_rate": 2.2688763376932224e-05, |
|
"loss": 2.2964, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 10.94307372175981, |
|
"grad_norm": 9.576150894165039, |
|
"learning_rate": 2.2642315695600478e-05, |
|
"loss": 2.2853, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 10.961652794292508, |
|
"grad_norm": 7.3996429443359375, |
|
"learning_rate": 2.2595868014268728e-05, |
|
"loss": 2.278, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 10.980231866825209, |
|
"grad_norm": 6.478265762329102, |
|
"learning_rate": 2.254942033293698e-05, |
|
"loss": 2.2857, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 10.998810939357908, |
|
"grad_norm": 7.264919757843018, |
|
"learning_rate": 2.2502972651605233e-05, |
|
"loss": 2.2638, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 11.017390011890607, |
|
"grad_norm": 6.449435234069824, |
|
"learning_rate": 2.2456524970273486e-05, |
|
"loss": 2.2538, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 11.035969084423305, |
|
"grad_norm": 8.838685035705566, |
|
"learning_rate": 2.2410077288941737e-05, |
|
"loss": 2.2658, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 11.054548156956004, |
|
"grad_norm": 7.12150764465332, |
|
"learning_rate": 2.236362960760999e-05, |
|
"loss": 2.2582, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 11.073127229488703, |
|
"grad_norm": 7.337321758270264, |
|
"learning_rate": 2.231718192627824e-05, |
|
"loss": 2.2531, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 11.091706302021404, |
|
"grad_norm": 7.290600776672363, |
|
"learning_rate": 2.2270734244946492e-05, |
|
"loss": 2.2607, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 11.110285374554103, |
|
"grad_norm": 6.834112644195557, |
|
"learning_rate": 2.2224286563614746e-05, |
|
"loss": 2.2593, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 11.128864447086801, |
|
"grad_norm": 7.174058437347412, |
|
"learning_rate": 2.2177838882282996e-05, |
|
"loss": 2.2584, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 11.1474435196195, |
|
"grad_norm": 6.08710241317749, |
|
"learning_rate": 2.213139120095125e-05, |
|
"loss": 2.2572, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 11.1660225921522, |
|
"grad_norm": 7.66245174407959, |
|
"learning_rate": 2.2084943519619504e-05, |
|
"loss": 2.2597, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 11.1846016646849, |
|
"grad_norm": 6.607715606689453, |
|
"learning_rate": 2.2038495838287754e-05, |
|
"loss": 2.2383, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 11.203180737217599, |
|
"grad_norm": 6.562816143035889, |
|
"learning_rate": 2.1992048156956005e-05, |
|
"loss": 2.2497, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 11.221759809750298, |
|
"grad_norm": 6.655299186706543, |
|
"learning_rate": 2.194560047562426e-05, |
|
"loss": 2.2628, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 11.240338882282996, |
|
"grad_norm": 6.629017353057861, |
|
"learning_rate": 2.189915279429251e-05, |
|
"loss": 2.2568, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 11.258917954815695, |
|
"grad_norm": 7.567939281463623, |
|
"learning_rate": 2.185270511296076e-05, |
|
"loss": 2.2732, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 11.277497027348394, |
|
"grad_norm": 8.384344100952148, |
|
"learning_rate": 2.1806257431629014e-05, |
|
"loss": 2.2702, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 11.296076099881095, |
|
"grad_norm": 7.6042914390563965, |
|
"learning_rate": 2.1759809750297268e-05, |
|
"loss": 2.2671, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 11.314655172413794, |
|
"grad_norm": 6.45172643661499, |
|
"learning_rate": 2.1713362068965518e-05, |
|
"loss": 2.2531, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 11.333234244946492, |
|
"grad_norm": 6.863234519958496, |
|
"learning_rate": 2.1666914387633772e-05, |
|
"loss": 2.2627, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 11.351813317479191, |
|
"grad_norm": 8.442804336547852, |
|
"learning_rate": 2.1620466706302022e-05, |
|
"loss": 2.2551, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 11.37039239001189, |
|
"grad_norm": 8.2174072265625, |
|
"learning_rate": 2.1574019024970273e-05, |
|
"loss": 2.2559, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 11.388971462544589, |
|
"grad_norm": 7.479830265045166, |
|
"learning_rate": 2.1527571343638527e-05, |
|
"loss": 2.257, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 11.40755053507729, |
|
"grad_norm": 6.4119744300842285, |
|
"learning_rate": 2.148112366230678e-05, |
|
"loss": 2.2577, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 11.426129607609989, |
|
"grad_norm": 7.141465187072754, |
|
"learning_rate": 2.143467598097503e-05, |
|
"loss": 2.2583, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 11.444708680142687, |
|
"grad_norm": 7.255865097045898, |
|
"learning_rate": 2.138822829964328e-05, |
|
"loss": 2.2549, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 11.463287752675386, |
|
"grad_norm": 6.533185958862305, |
|
"learning_rate": 2.1341780618311535e-05, |
|
"loss": 2.2563, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 11.481866825208085, |
|
"grad_norm": 5.948304176330566, |
|
"learning_rate": 2.1295332936979786e-05, |
|
"loss": 2.2629, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 11.500445897740784, |
|
"grad_norm": 7.485329627990723, |
|
"learning_rate": 2.1248885255648036e-05, |
|
"loss": 2.243, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 11.519024970273485, |
|
"grad_norm": 7.400222301483154, |
|
"learning_rate": 2.120243757431629e-05, |
|
"loss": 2.2436, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 11.537604042806183, |
|
"grad_norm": 7.361048221588135, |
|
"learning_rate": 2.1155989892984544e-05, |
|
"loss": 2.2597, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 11.556183115338882, |
|
"grad_norm": 7.483823776245117, |
|
"learning_rate": 2.1109542211652795e-05, |
|
"loss": 2.2643, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 11.574762187871581, |
|
"grad_norm": 7.027825832366943, |
|
"learning_rate": 2.106309453032105e-05, |
|
"loss": 2.2455, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 11.59334126040428, |
|
"grad_norm": 6.856015205383301, |
|
"learning_rate": 2.10166468489893e-05, |
|
"loss": 2.2351, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 11.611920332936979, |
|
"grad_norm": 7.20182991027832, |
|
"learning_rate": 2.097019916765755e-05, |
|
"loss": 2.2472, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 11.63049940546968, |
|
"grad_norm": 6.145348072052002, |
|
"learning_rate": 2.0923751486325803e-05, |
|
"loss": 2.2493, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 11.649078478002378, |
|
"grad_norm": 7.6849517822265625, |
|
"learning_rate": 2.0877303804994054e-05, |
|
"loss": 2.2468, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 11.667657550535077, |
|
"grad_norm": 7.373369216918945, |
|
"learning_rate": 2.0830856123662308e-05, |
|
"loss": 2.252, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 11.686236623067776, |
|
"grad_norm": 7.262668132781982, |
|
"learning_rate": 2.0784408442330562e-05, |
|
"loss": 2.2411, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 11.704815695600475, |
|
"grad_norm": 6.475069999694824, |
|
"learning_rate": 2.0737960760998812e-05, |
|
"loss": 2.245, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 11.723394768133176, |
|
"grad_norm": 6.434516906738281, |
|
"learning_rate": 2.0691513079667063e-05, |
|
"loss": 2.2459, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 11.741973840665874, |
|
"grad_norm": 7.697376251220703, |
|
"learning_rate": 2.0645065398335317e-05, |
|
"loss": 2.2556, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 11.760552913198573, |
|
"grad_norm": 7.839350700378418, |
|
"learning_rate": 2.0598617717003567e-05, |
|
"loss": 2.2431, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 11.779131985731272, |
|
"grad_norm": 7.546802997589111, |
|
"learning_rate": 2.055217003567182e-05, |
|
"loss": 2.2627, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 11.797711058263971, |
|
"grad_norm": 6.828023910522461, |
|
"learning_rate": 2.0505722354340075e-05, |
|
"loss": 2.2353, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 11.81629013079667, |
|
"grad_norm": 6.4239935874938965, |
|
"learning_rate": 2.0459274673008325e-05, |
|
"loss": 2.2472, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 11.83486920332937, |
|
"grad_norm": 6.941580772399902, |
|
"learning_rate": 2.0412826991676576e-05, |
|
"loss": 2.2423, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 11.85344827586207, |
|
"grad_norm": 7.385081768035889, |
|
"learning_rate": 2.036637931034483e-05, |
|
"loss": 2.2332, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 11.872027348394768, |
|
"grad_norm": 7.3545613288879395, |
|
"learning_rate": 2.031993162901308e-05, |
|
"loss": 2.2592, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 11.890606420927467, |
|
"grad_norm": 6.4375104904174805, |
|
"learning_rate": 2.027348394768133e-05, |
|
"loss": 2.2352, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 11.909185493460166, |
|
"grad_norm": 6.863650798797607, |
|
"learning_rate": 2.0227036266349585e-05, |
|
"loss": 2.2622, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 11.927764565992865, |
|
"grad_norm": 6.5175275802612305, |
|
"learning_rate": 2.018058858501784e-05, |
|
"loss": 2.2421, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 11.946343638525565, |
|
"grad_norm": 7.415239334106445, |
|
"learning_rate": 2.013414090368609e-05, |
|
"loss": 2.2483, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 11.964922711058264, |
|
"grad_norm": 8.416884422302246, |
|
"learning_rate": 2.0087693222354343e-05, |
|
"loss": 2.245, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 11.983501783590963, |
|
"grad_norm": 6.286489009857178, |
|
"learning_rate": 2.0041245541022593e-05, |
|
"loss": 2.2409, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 12.002080856123662, |
|
"grad_norm": 7.4863080978393555, |
|
"learning_rate": 1.9994797859690844e-05, |
|
"loss": 2.2337, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 12.020659928656361, |
|
"grad_norm": 6.175674915313721, |
|
"learning_rate": 1.9948350178359094e-05, |
|
"loss": 2.2054, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 12.03923900118906, |
|
"grad_norm": 7.600936412811279, |
|
"learning_rate": 1.9901902497027348e-05, |
|
"loss": 2.2135, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 12.05781807372176, |
|
"grad_norm": 7.510547637939453, |
|
"learning_rate": 1.9855454815695602e-05, |
|
"loss": 2.231, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 12.07639714625446, |
|
"grad_norm": 6.505836009979248, |
|
"learning_rate": 1.9809007134363853e-05, |
|
"loss": 2.2123, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 12.094976218787158, |
|
"grad_norm": 7.495330333709717, |
|
"learning_rate": 1.9762559453032106e-05, |
|
"loss": 2.2048, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 12.113555291319857, |
|
"grad_norm": 7.062661170959473, |
|
"learning_rate": 1.9716111771700357e-05, |
|
"loss": 2.2055, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 12.132134363852556, |
|
"grad_norm": 7.220265865325928, |
|
"learning_rate": 1.9669664090368607e-05, |
|
"loss": 2.2333, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 12.150713436385256, |
|
"grad_norm": 6.432553768157959, |
|
"learning_rate": 1.962321640903686e-05, |
|
"loss": 2.2274, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 12.169292508917955, |
|
"grad_norm": 7.610962390899658, |
|
"learning_rate": 1.9576768727705115e-05, |
|
"loss": 2.2108, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 12.187871581450654, |
|
"grad_norm": 8.169533729553223, |
|
"learning_rate": 1.9530321046373366e-05, |
|
"loss": 2.1948, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 12.206450653983353, |
|
"grad_norm": 6.529592037200928, |
|
"learning_rate": 1.948387336504162e-05, |
|
"loss": 2.2195, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 12.225029726516052, |
|
"grad_norm": 7.463806629180908, |
|
"learning_rate": 1.943742568370987e-05, |
|
"loss": 2.221, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 12.24360879904875, |
|
"grad_norm": 7.339646816253662, |
|
"learning_rate": 1.939097800237812e-05, |
|
"loss": 2.2126, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 12.262187871581451, |
|
"grad_norm": 7.518458366394043, |
|
"learning_rate": 1.9344530321046374e-05, |
|
"loss": 2.2102, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 12.28076694411415, |
|
"grad_norm": 7.828365325927734, |
|
"learning_rate": 1.9298082639714625e-05, |
|
"loss": 2.2335, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 12.29934601664685, |
|
"grad_norm": 7.198127269744873, |
|
"learning_rate": 1.925163495838288e-05, |
|
"loss": 2.2214, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 12.317925089179548, |
|
"grad_norm": 6.6039533615112305, |
|
"learning_rate": 1.9205187277051133e-05, |
|
"loss": 2.1985, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 12.336504161712247, |
|
"grad_norm": 7.200562477111816, |
|
"learning_rate": 1.9158739595719383e-05, |
|
"loss": 2.211, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 12.355083234244946, |
|
"grad_norm": 7.252729892730713, |
|
"learning_rate": 1.9112291914387634e-05, |
|
"loss": 2.2302, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 12.373662306777646, |
|
"grad_norm": 7.972862243652344, |
|
"learning_rate": 1.9065844233055888e-05, |
|
"loss": 2.2095, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 12.392241379310345, |
|
"grad_norm": 8.594975471496582, |
|
"learning_rate": 1.9019396551724138e-05, |
|
"loss": 2.2204, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 12.410820451843044, |
|
"grad_norm": 7.73285436630249, |
|
"learning_rate": 1.897294887039239e-05, |
|
"loss": 2.2174, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 12.429399524375743, |
|
"grad_norm": 6.429736614227295, |
|
"learning_rate": 1.8926501189060646e-05, |
|
"loss": 2.2236, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 12.447978596908442, |
|
"grad_norm": 6.68847131729126, |
|
"learning_rate": 1.8880053507728896e-05, |
|
"loss": 2.2293, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 12.46655766944114, |
|
"grad_norm": 6.902133464813232, |
|
"learning_rate": 1.8833605826397147e-05, |
|
"loss": 2.215, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 12.485136741973841, |
|
"grad_norm": 6.436554908752441, |
|
"learning_rate": 1.87871581450654e-05, |
|
"loss": 2.2269, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 12.50371581450654, |
|
"grad_norm": 6.80860710144043, |
|
"learning_rate": 1.874071046373365e-05, |
|
"loss": 2.2223, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 12.522294887039239, |
|
"grad_norm": 7.977982044219971, |
|
"learning_rate": 1.86942627824019e-05, |
|
"loss": 2.2155, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 12.540873959571938, |
|
"grad_norm": 7.9569478034973145, |
|
"learning_rate": 1.8647815101070156e-05, |
|
"loss": 2.2305, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 12.559453032104637, |
|
"grad_norm": 6.445404529571533, |
|
"learning_rate": 1.860136741973841e-05, |
|
"loss": 2.2075, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 12.578032104637337, |
|
"grad_norm": 7.153224468231201, |
|
"learning_rate": 1.855491973840666e-05, |
|
"loss": 2.2246, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 12.596611177170036, |
|
"grad_norm": 7.287299633026123, |
|
"learning_rate": 1.850847205707491e-05, |
|
"loss": 2.1913, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 12.615190249702735, |
|
"grad_norm": 6.6666107177734375, |
|
"learning_rate": 1.8462024375743164e-05, |
|
"loss": 2.2267, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 12.633769322235434, |
|
"grad_norm": 7.024231433868408, |
|
"learning_rate": 1.8415576694411415e-05, |
|
"loss": 2.2106, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 12.652348394768133, |
|
"grad_norm": 6.549313068389893, |
|
"learning_rate": 1.8369129013079665e-05, |
|
"loss": 2.2208, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 12.670927467300832, |
|
"grad_norm": 6.641164302825928, |
|
"learning_rate": 1.832268133174792e-05, |
|
"loss": 2.2157, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 12.689506539833532, |
|
"grad_norm": 7.615879535675049, |
|
"learning_rate": 1.8276233650416173e-05, |
|
"loss": 2.217, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 12.708085612366231, |
|
"grad_norm": 7.870852470397949, |
|
"learning_rate": 1.8229785969084424e-05, |
|
"loss": 2.2254, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 12.72666468489893, |
|
"grad_norm": 5.989630222320557, |
|
"learning_rate": 1.8183338287752677e-05, |
|
"loss": 2.2129, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 12.745243757431629, |
|
"grad_norm": 6.8082594871521, |
|
"learning_rate": 1.8136890606420928e-05, |
|
"loss": 2.2229, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 12.763822829964328, |
|
"grad_norm": 7.244877338409424, |
|
"learning_rate": 1.809044292508918e-05, |
|
"loss": 2.2127, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 12.782401902497027, |
|
"grad_norm": 7.6857008934021, |
|
"learning_rate": 1.8043995243757432e-05, |
|
"loss": 2.2235, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 12.800980975029727, |
|
"grad_norm": 7.00359582901001, |
|
"learning_rate": 1.7997547562425686e-05, |
|
"loss": 2.2211, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 12.819560047562426, |
|
"grad_norm": 7.0071187019348145, |
|
"learning_rate": 1.7951099881093937e-05, |
|
"loss": 2.2182, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 12.838139120095125, |
|
"grad_norm": 6.9319634437561035, |
|
"learning_rate": 1.790465219976219e-05, |
|
"loss": 2.2043, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 12.856718192627824, |
|
"grad_norm": 6.487482070922852, |
|
"learning_rate": 1.785820451843044e-05, |
|
"loss": 2.2139, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 12.875297265160523, |
|
"grad_norm": 7.508727550506592, |
|
"learning_rate": 1.781175683709869e-05, |
|
"loss": 2.2243, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 12.893876337693222, |
|
"grad_norm": 6.555574893951416, |
|
"learning_rate": 1.7765309155766945e-05, |
|
"loss": 2.2167, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 12.912455410225922, |
|
"grad_norm": 7.410988807678223, |
|
"learning_rate": 1.7718861474435196e-05, |
|
"loss": 2.2158, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 12.931034482758621, |
|
"grad_norm": 8.217428207397461, |
|
"learning_rate": 1.767241379310345e-05, |
|
"loss": 2.2031, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 12.94961355529132, |
|
"grad_norm": 6.6040754318237305, |
|
"learning_rate": 1.7625966111771704e-05, |
|
"loss": 2.1866, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 12.968192627824019, |
|
"grad_norm": 6.99837064743042, |
|
"learning_rate": 1.7579518430439954e-05, |
|
"loss": 2.1899, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 12.986771700356718, |
|
"grad_norm": 6.531412124633789, |
|
"learning_rate": 1.7533070749108205e-05, |
|
"loss": 2.2244, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 13.005350772889418, |
|
"grad_norm": 7.704728126525879, |
|
"learning_rate": 1.748662306777646e-05, |
|
"loss": 2.18, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 13.023929845422117, |
|
"grad_norm": 6.77532434463501, |
|
"learning_rate": 1.744017538644471e-05, |
|
"loss": 2.1789, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 13.042508917954816, |
|
"grad_norm": 6.446128845214844, |
|
"learning_rate": 1.739372770511296e-05, |
|
"loss": 2.1707, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 13.061087990487515, |
|
"grad_norm": 7.576733589172363, |
|
"learning_rate": 1.7347280023781213e-05, |
|
"loss": 2.1768, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 13.079667063020214, |
|
"grad_norm": 7.239291191101074, |
|
"learning_rate": 1.7300832342449467e-05, |
|
"loss": 2.2011, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 13.098246135552913, |
|
"grad_norm": 6.936691761016846, |
|
"learning_rate": 1.7254384661117718e-05, |
|
"loss": 2.1767, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 13.116825208085613, |
|
"grad_norm": 7.205715179443359, |
|
"learning_rate": 1.7207936979785968e-05, |
|
"loss": 2.1581, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 13.135404280618312, |
|
"grad_norm": 6.61326789855957, |
|
"learning_rate": 1.7161489298454222e-05, |
|
"loss": 2.185, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 13.15398335315101, |
|
"grad_norm": 7.715660572052002, |
|
"learning_rate": 1.7115041617122473e-05, |
|
"loss": 2.1924, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 13.17256242568371, |
|
"grad_norm": 6.543544769287109, |
|
"learning_rate": 1.7068593935790726e-05, |
|
"loss": 2.1998, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 13.191141498216409, |
|
"grad_norm": 8.281086921691895, |
|
"learning_rate": 1.702214625445898e-05, |
|
"loss": 2.1787, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 13.209720570749107, |
|
"grad_norm": 6.323915481567383, |
|
"learning_rate": 1.697569857312723e-05, |
|
"loss": 2.1834, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 13.228299643281808, |
|
"grad_norm": 8.45340347290039, |
|
"learning_rate": 1.692925089179548e-05, |
|
"loss": 2.1806, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 13.246878715814507, |
|
"grad_norm": 8.1563720703125, |
|
"learning_rate": 1.6882803210463735e-05, |
|
"loss": 2.1708, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 13.265457788347206, |
|
"grad_norm": 7.083395481109619, |
|
"learning_rate": 1.6836355529131986e-05, |
|
"loss": 2.1866, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 13.284036860879905, |
|
"grad_norm": 6.55299186706543, |
|
"learning_rate": 1.6789907847800236e-05, |
|
"loss": 2.1723, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 13.302615933412604, |
|
"grad_norm": 6.710261821746826, |
|
"learning_rate": 1.674346016646849e-05, |
|
"loss": 2.1977, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 13.321195005945302, |
|
"grad_norm": 7.0249738693237305, |
|
"learning_rate": 1.6697012485136744e-05, |
|
"loss": 2.2007, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 13.339774078478003, |
|
"grad_norm": 7.835285663604736, |
|
"learning_rate": 1.6650564803804994e-05, |
|
"loss": 2.1959, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 13.358353151010702, |
|
"grad_norm": 8.400995254516602, |
|
"learning_rate": 1.660411712247325e-05, |
|
"loss": 2.1991, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 13.3769322235434, |
|
"grad_norm": 6.235854148864746, |
|
"learning_rate": 1.65576694411415e-05, |
|
"loss": 2.174, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 13.3955112960761, |
|
"grad_norm": 6.741766929626465, |
|
"learning_rate": 1.651122175980975e-05, |
|
"loss": 2.1777, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 13.414090368608798, |
|
"grad_norm": 8.243950843811035, |
|
"learning_rate": 1.6464774078478003e-05, |
|
"loss": 2.1841, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 13.432669441141499, |
|
"grad_norm": 6.43676233291626, |
|
"learning_rate": 1.6418326397146254e-05, |
|
"loss": 2.1769, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 13.451248513674198, |
|
"grad_norm": 6.800743579864502, |
|
"learning_rate": 1.6371878715814508e-05, |
|
"loss": 2.2043, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 13.469827586206897, |
|
"grad_norm": 6.082602500915527, |
|
"learning_rate": 1.632543103448276e-05, |
|
"loss": 2.167, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 13.488406658739596, |
|
"grad_norm": 7.768115520477295, |
|
"learning_rate": 1.6278983353151012e-05, |
|
"loss": 2.1623, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 13.506985731272295, |
|
"grad_norm": 6.893867492675781, |
|
"learning_rate": 1.6232535671819262e-05, |
|
"loss": 2.1835, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 13.525564803804993, |
|
"grad_norm": 6.749509811401367, |
|
"learning_rate": 1.6186087990487516e-05, |
|
"loss": 2.1659, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 13.544143876337694, |
|
"grad_norm": 6.05668306350708, |
|
"learning_rate": 1.6139640309155767e-05, |
|
"loss": 2.1703, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 13.562722948870393, |
|
"grad_norm": 7.0912251472473145, |
|
"learning_rate": 1.609319262782402e-05, |
|
"loss": 2.1919, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 13.581302021403092, |
|
"grad_norm": 6.6050310134887695, |
|
"learning_rate": 1.6046744946492275e-05, |
|
"loss": 2.1756, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 13.59988109393579, |
|
"grad_norm": 6.950946807861328, |
|
"learning_rate": 1.6000297265160525e-05, |
|
"loss": 2.1825, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 13.61846016646849, |
|
"grad_norm": 7.240453243255615, |
|
"learning_rate": 1.5953849583828776e-05, |
|
"loss": 2.1837, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 13.637039239001188, |
|
"grad_norm": 8.0787935256958, |
|
"learning_rate": 1.590740190249703e-05, |
|
"loss": 2.1747, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 13.655618311533889, |
|
"grad_norm": 6.953646659851074, |
|
"learning_rate": 1.586095422116528e-05, |
|
"loss": 2.1821, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 13.674197384066588, |
|
"grad_norm": 6.981358051300049, |
|
"learning_rate": 1.581450653983353e-05, |
|
"loss": 2.1751, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 13.692776456599287, |
|
"grad_norm": 7.580711841583252, |
|
"learning_rate": 1.5768058858501784e-05, |
|
"loss": 2.1685, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 13.711355529131986, |
|
"grad_norm": 7.360109806060791, |
|
"learning_rate": 1.5721611177170038e-05, |
|
"loss": 2.1566, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 13.729934601664684, |
|
"grad_norm": 6.589022636413574, |
|
"learning_rate": 1.567516349583829e-05, |
|
"loss": 2.1725, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 13.748513674197383, |
|
"grad_norm": 7.376802444458008, |
|
"learning_rate": 1.562871581450654e-05, |
|
"loss": 2.1814, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 13.767092746730084, |
|
"grad_norm": 7.36546516418457, |
|
"learning_rate": 1.5582268133174793e-05, |
|
"loss": 2.1824, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 13.785671819262783, |
|
"grad_norm": 7.832765579223633, |
|
"learning_rate": 1.5535820451843044e-05, |
|
"loss": 2.1651, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 13.804250891795482, |
|
"grad_norm": 7.414605617523193, |
|
"learning_rate": 1.5489372770511294e-05, |
|
"loss": 2.1488, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 13.82282996432818, |
|
"grad_norm": 7.148501873016357, |
|
"learning_rate": 1.5442925089179548e-05, |
|
"loss": 2.185, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 13.84140903686088, |
|
"grad_norm": 6.733073711395264, |
|
"learning_rate": 1.5396477407847802e-05, |
|
"loss": 2.1645, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 13.85998810939358, |
|
"grad_norm": 7.812681198120117, |
|
"learning_rate": 1.5350029726516052e-05, |
|
"loss": 2.1836, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 13.878567181926279, |
|
"grad_norm": 6.853206634521484, |
|
"learning_rate": 1.5303582045184306e-05, |
|
"loss": 2.1778, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 13.897146254458978, |
|
"grad_norm": 7.234543323516846, |
|
"learning_rate": 1.5257134363852557e-05, |
|
"loss": 2.1759, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 13.915725326991677, |
|
"grad_norm": 7.433253765106201, |
|
"learning_rate": 1.5210686682520809e-05, |
|
"loss": 2.1722, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 13.934304399524375, |
|
"grad_norm": 7.073111534118652, |
|
"learning_rate": 1.5164239001189063e-05, |
|
"loss": 2.1855, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 13.952883472057074, |
|
"grad_norm": 7.280003547668457, |
|
"learning_rate": 1.5117791319857313e-05, |
|
"loss": 2.1819, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 13.971462544589775, |
|
"grad_norm": 6.7823991775512695, |
|
"learning_rate": 1.5071343638525565e-05, |
|
"loss": 2.1585, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 13.990041617122474, |
|
"grad_norm": 7.181284427642822, |
|
"learning_rate": 1.502489595719382e-05, |
|
"loss": 2.1689, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 14.008620689655173, |
|
"grad_norm": 6.957113265991211, |
|
"learning_rate": 1.497844827586207e-05, |
|
"loss": 2.1754, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 14.027199762187871, |
|
"grad_norm": 7.111293315887451, |
|
"learning_rate": 1.493200059453032e-05, |
|
"loss": 2.1651, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 14.04577883472057, |
|
"grad_norm": 7.025313854217529, |
|
"learning_rate": 1.4885552913198574e-05, |
|
"loss": 2.1458, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 14.06435790725327, |
|
"grad_norm": 6.963667869567871, |
|
"learning_rate": 1.4839105231866826e-05, |
|
"loss": 2.1456, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 14.08293697978597, |
|
"grad_norm": 7.611172199249268, |
|
"learning_rate": 1.4792657550535077e-05, |
|
"loss": 2.158, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 14.101516052318669, |
|
"grad_norm": 6.874037265777588, |
|
"learning_rate": 1.474620986920333e-05, |
|
"loss": 2.1446, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 14.120095124851368, |
|
"grad_norm": 7.512300491333008, |
|
"learning_rate": 1.4699762187871583e-05, |
|
"loss": 2.1528, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 14.138674197384066, |
|
"grad_norm": 6.693312168121338, |
|
"learning_rate": 1.4653314506539833e-05, |
|
"loss": 2.1563, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 14.157253269916765, |
|
"grad_norm": 6.6438164710998535, |
|
"learning_rate": 1.4606866825208087e-05, |
|
"loss": 2.1383, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 14.175832342449464, |
|
"grad_norm": 7.537757873535156, |
|
"learning_rate": 1.456041914387634e-05, |
|
"loss": 2.1554, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 14.194411414982165, |
|
"grad_norm": 8.159100532531738, |
|
"learning_rate": 1.451397146254459e-05, |
|
"loss": 2.1538, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 14.212990487514864, |
|
"grad_norm": 7.427910327911377, |
|
"learning_rate": 1.4467523781212844e-05, |
|
"loss": 2.1623, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 14.231569560047562, |
|
"grad_norm": 7.805336952209473, |
|
"learning_rate": 1.4421076099881094e-05, |
|
"loss": 2.1645, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 14.250148632580261, |
|
"grad_norm": 6.669980525970459, |
|
"learning_rate": 1.4374628418549347e-05, |
|
"loss": 2.1525, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 14.26872770511296, |
|
"grad_norm": 7.358639240264893, |
|
"learning_rate": 1.4328180737217597e-05, |
|
"loss": 2.1542, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 14.28730677764566, |
|
"grad_norm": 7.103815078735352, |
|
"learning_rate": 1.4281733055885851e-05, |
|
"loss": 2.1567, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 14.30588585017836, |
|
"grad_norm": 7.218321800231934, |
|
"learning_rate": 1.4235285374554103e-05, |
|
"loss": 2.1569, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 14.324464922711059, |
|
"grad_norm": 7.941781520843506, |
|
"learning_rate": 1.4188837693222354e-05, |
|
"loss": 2.1554, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 14.343043995243757, |
|
"grad_norm": 8.86156940460205, |
|
"learning_rate": 1.4142390011890607e-05, |
|
"loss": 2.1649, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 14.361623067776456, |
|
"grad_norm": 6.904116153717041, |
|
"learning_rate": 1.409594233055886e-05, |
|
"loss": 2.1486, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 14.380202140309155, |
|
"grad_norm": 6.8697943687438965, |
|
"learning_rate": 1.404949464922711e-05, |
|
"loss": 2.1686, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 14.398781212841856, |
|
"grad_norm": 7.536423683166504, |
|
"learning_rate": 1.4003046967895364e-05, |
|
"loss": 2.1534, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 14.417360285374555, |
|
"grad_norm": 6.2832465171813965, |
|
"learning_rate": 1.3956599286563615e-05, |
|
"loss": 2.1638, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 14.435939357907253, |
|
"grad_norm": 7.5254926681518555, |
|
"learning_rate": 1.3910151605231867e-05, |
|
"loss": 2.1611, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 14.454518430439952, |
|
"grad_norm": 6.102006912231445, |
|
"learning_rate": 1.386370392390012e-05, |
|
"loss": 2.1481, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 14.473097502972651, |
|
"grad_norm": 6.829434871673584, |
|
"learning_rate": 1.3817256242568371e-05, |
|
"loss": 2.1309, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 14.49167657550535, |
|
"grad_norm": 7.072176456451416, |
|
"learning_rate": 1.3770808561236623e-05, |
|
"loss": 2.1493, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 14.51025564803805, |
|
"grad_norm": 8.32613754272461, |
|
"learning_rate": 1.3724360879904877e-05, |
|
"loss": 2.128, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 14.52883472057075, |
|
"grad_norm": 7.587469577789307, |
|
"learning_rate": 1.3677913198573128e-05, |
|
"loss": 2.1446, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 14.547413793103448, |
|
"grad_norm": 7.003942966461182, |
|
"learning_rate": 1.363146551724138e-05, |
|
"loss": 2.1493, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 14.565992865636147, |
|
"grad_norm": 6.587801456451416, |
|
"learning_rate": 1.3585017835909634e-05, |
|
"loss": 2.1554, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 14.584571938168846, |
|
"grad_norm": 6.796844005584717, |
|
"learning_rate": 1.3538570154577884e-05, |
|
"loss": 2.1699, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 14.603151010701545, |
|
"grad_norm": 6.230968952178955, |
|
"learning_rate": 1.3492122473246135e-05, |
|
"loss": 2.1514, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 14.621730083234246, |
|
"grad_norm": 7.986715793609619, |
|
"learning_rate": 1.3445674791914389e-05, |
|
"loss": 2.1439, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 14.640309155766944, |
|
"grad_norm": 6.953087329864502, |
|
"learning_rate": 1.339922711058264e-05, |
|
"loss": 2.1359, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 14.658888228299643, |
|
"grad_norm": 6.939476490020752, |
|
"learning_rate": 1.3352779429250891e-05, |
|
"loss": 2.1438, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 14.677467300832342, |
|
"grad_norm": 7.4189229011535645, |
|
"learning_rate": 1.3306331747919145e-05, |
|
"loss": 2.1494, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 14.696046373365041, |
|
"grad_norm": 6.914766788482666, |
|
"learning_rate": 1.3259884066587397e-05, |
|
"loss": 2.1336, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 14.714625445897742, |
|
"grad_norm": 6.602614402770996, |
|
"learning_rate": 1.3213436385255648e-05, |
|
"loss": 2.154, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 14.73320451843044, |
|
"grad_norm": 7.446470260620117, |
|
"learning_rate": 1.3166988703923902e-05, |
|
"loss": 2.1425, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 14.75178359096314, |
|
"grad_norm": 6.55057430267334, |
|
"learning_rate": 1.3120541022592154e-05, |
|
"loss": 2.1403, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 14.770362663495838, |
|
"grad_norm": 6.798906326293945, |
|
"learning_rate": 1.3074093341260404e-05, |
|
"loss": 2.1396, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 14.788941736028537, |
|
"grad_norm": 7.93524169921875, |
|
"learning_rate": 1.3027645659928655e-05, |
|
"loss": 2.1665, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 14.807520808561236, |
|
"grad_norm": 8.041824340820312, |
|
"learning_rate": 1.2981197978596909e-05, |
|
"loss": 2.1377, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 14.826099881093937, |
|
"grad_norm": 6.651689529418945, |
|
"learning_rate": 1.2934750297265161e-05, |
|
"loss": 2.1461, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 14.844678953626635, |
|
"grad_norm": 6.821606636047363, |
|
"learning_rate": 1.2888302615933411e-05, |
|
"loss": 2.158, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 14.863258026159334, |
|
"grad_norm": 8.040721893310547, |
|
"learning_rate": 1.2841854934601665e-05, |
|
"loss": 2.1466, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 14.881837098692033, |
|
"grad_norm": 7.286508083343506, |
|
"learning_rate": 1.2795407253269918e-05, |
|
"loss": 2.1428, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 14.900416171224732, |
|
"grad_norm": 8.08362102508545, |
|
"learning_rate": 1.2748959571938168e-05, |
|
"loss": 2.1515, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 14.918995243757431, |
|
"grad_norm": 8.438191413879395, |
|
"learning_rate": 1.2702511890606422e-05, |
|
"loss": 2.1446, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 14.937574316290132, |
|
"grad_norm": 7.372959136962891, |
|
"learning_rate": 1.2656064209274674e-05, |
|
"loss": 2.1496, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 14.95615338882283, |
|
"grad_norm": 7.080979347229004, |
|
"learning_rate": 1.2609616527942925e-05, |
|
"loss": 2.1579, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 14.97473246135553, |
|
"grad_norm": 7.254255294799805, |
|
"learning_rate": 1.2563168846611178e-05, |
|
"loss": 2.1414, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 14.993311533888228, |
|
"grad_norm": 7.761992931365967, |
|
"learning_rate": 1.2516721165279429e-05, |
|
"loss": 2.15, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 15.011890606420927, |
|
"grad_norm": 7.0644049644470215, |
|
"learning_rate": 1.2470273483947683e-05, |
|
"loss": 2.1474, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 15.030469678953626, |
|
"grad_norm": 8.067272186279297, |
|
"learning_rate": 1.2423825802615933e-05, |
|
"loss": 2.1103, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 15.049048751486326, |
|
"grad_norm": 6.896698474884033, |
|
"learning_rate": 1.2377378121284185e-05, |
|
"loss": 2.1159, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 15.067627824019025, |
|
"grad_norm": 6.983173370361328, |
|
"learning_rate": 1.233093043995244e-05, |
|
"loss": 2.1463, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 15.086206896551724, |
|
"grad_norm": 8.10067367553711, |
|
"learning_rate": 1.228448275862069e-05, |
|
"loss": 2.1418, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 15.104785969084423, |
|
"grad_norm": 7.817485332489014, |
|
"learning_rate": 1.2238035077288942e-05, |
|
"loss": 2.1433, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 15.123365041617122, |
|
"grad_norm": 7.6188578605651855, |
|
"learning_rate": 1.2191587395957194e-05, |
|
"loss": 2.1198, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 15.14194411414982, |
|
"grad_norm": 7.024149417877197, |
|
"learning_rate": 1.2145139714625446e-05, |
|
"loss": 2.1157, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 15.160523186682521, |
|
"grad_norm": 6.95907735824585, |
|
"learning_rate": 1.2098692033293699e-05, |
|
"loss": 2.134, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 15.17910225921522, |
|
"grad_norm": 6.850398540496826, |
|
"learning_rate": 1.205224435196195e-05, |
|
"loss": 2.1178, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 15.19768133174792, |
|
"grad_norm": 7.054015159606934, |
|
"learning_rate": 1.2005796670630203e-05, |
|
"loss": 2.1353, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 15.216260404280618, |
|
"grad_norm": 8.049177169799805, |
|
"learning_rate": 1.1959348989298455e-05, |
|
"loss": 2.1175, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 15.234839476813317, |
|
"grad_norm": 7.3112568855285645, |
|
"learning_rate": 1.1912901307966706e-05, |
|
"loss": 2.1269, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 15.253418549346017, |
|
"grad_norm": 7.102066516876221, |
|
"learning_rate": 1.186645362663496e-05, |
|
"loss": 2.121, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 15.271997621878716, |
|
"grad_norm": 7.103978633880615, |
|
"learning_rate": 1.1820005945303212e-05, |
|
"loss": 2.123, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 15.290576694411415, |
|
"grad_norm": 7.16837215423584, |
|
"learning_rate": 1.1773558263971462e-05, |
|
"loss": 2.1555, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 15.309155766944114, |
|
"grad_norm": 7.387100696563721, |
|
"learning_rate": 1.1727110582639714e-05, |
|
"loss": 2.1323, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 15.327734839476813, |
|
"grad_norm": 7.893144607543945, |
|
"learning_rate": 1.1680662901307968e-05, |
|
"loss": 2.1358, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 15.346313912009512, |
|
"grad_norm": 7.737049579620361, |
|
"learning_rate": 1.1634215219976219e-05, |
|
"loss": 2.122, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 15.364892984542212, |
|
"grad_norm": 7.758161544799805, |
|
"learning_rate": 1.1587767538644471e-05, |
|
"loss": 2.1262, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 15.383472057074911, |
|
"grad_norm": 7.8588666915893555, |
|
"learning_rate": 1.1541319857312725e-05, |
|
"loss": 2.1202, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 15.40205112960761, |
|
"grad_norm": 7.353470325469971, |
|
"learning_rate": 1.1494872175980975e-05, |
|
"loss": 2.1406, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 15.420630202140309, |
|
"grad_norm": 6.766369819641113, |
|
"learning_rate": 1.1448424494649228e-05, |
|
"loss": 2.1344, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 15.439209274673008, |
|
"grad_norm": 7.156630992889404, |
|
"learning_rate": 1.140197681331748e-05, |
|
"loss": 2.1232, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 15.457788347205707, |
|
"grad_norm": 7.754790782928467, |
|
"learning_rate": 1.1355529131985732e-05, |
|
"loss": 2.1154, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 15.476367419738407, |
|
"grad_norm": 8.716788291931152, |
|
"learning_rate": 1.1309081450653984e-05, |
|
"loss": 2.1236, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 15.494946492271106, |
|
"grad_norm": 7.345715522766113, |
|
"learning_rate": 1.1262633769322235e-05, |
|
"loss": 2.1299, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 15.513525564803805, |
|
"grad_norm": 7.088531494140625, |
|
"learning_rate": 1.1216186087990488e-05, |
|
"loss": 2.1168, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 15.532104637336504, |
|
"grad_norm": 7.417008876800537, |
|
"learning_rate": 1.116973840665874e-05, |
|
"loss": 2.1247, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 15.550683709869203, |
|
"grad_norm": 7.3177995681762695, |
|
"learning_rate": 1.1123290725326991e-05, |
|
"loss": 2.1198, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 15.569262782401903, |
|
"grad_norm": 6.9706711769104, |
|
"learning_rate": 1.1076843043995245e-05, |
|
"loss": 2.1228, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 15.587841854934602, |
|
"grad_norm": 6.97265625, |
|
"learning_rate": 1.1030395362663497e-05, |
|
"loss": 2.1237, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 15.606420927467301, |
|
"grad_norm": 6.226667404174805, |
|
"learning_rate": 1.0983947681331748e-05, |
|
"loss": 2.1017, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 7.427140712738037, |
|
"learning_rate": 1.09375e-05, |
|
"loss": 2.1131, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 15.643579072532699, |
|
"grad_norm": 8.942204475402832, |
|
"learning_rate": 1.0891052318668254e-05, |
|
"loss": 2.1294, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 15.662158145065398, |
|
"grad_norm": 7.123710632324219, |
|
"learning_rate": 1.0844604637336504e-05, |
|
"loss": 2.1207, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 15.680737217598097, |
|
"grad_norm": 6.2210798263549805, |
|
"learning_rate": 1.0798156956004756e-05, |
|
"loss": 2.1222, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 15.699316290130797, |
|
"grad_norm": 7.38429069519043, |
|
"learning_rate": 1.0751709274673009e-05, |
|
"loss": 2.114, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 15.717895362663496, |
|
"grad_norm": 6.752946853637695, |
|
"learning_rate": 1.070526159334126e-05, |
|
"loss": 2.1105, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 15.736474435196195, |
|
"grad_norm": 6.8533406257629395, |
|
"learning_rate": 1.0658813912009513e-05, |
|
"loss": 2.1263, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 15.755053507728894, |
|
"grad_norm": 8.36920166015625, |
|
"learning_rate": 1.0612366230677765e-05, |
|
"loss": 2.1082, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 15.773632580261593, |
|
"grad_norm": 6.900448799133301, |
|
"learning_rate": 1.0565918549346017e-05, |
|
"loss": 2.1245, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 15.792211652794293, |
|
"grad_norm": 7.180041313171387, |
|
"learning_rate": 1.051947086801427e-05, |
|
"loss": 2.1163, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 15.810790725326992, |
|
"grad_norm": 7.32526159286499, |
|
"learning_rate": 1.047302318668252e-05, |
|
"loss": 2.1344, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 15.829369797859691, |
|
"grad_norm": 7.500328540802002, |
|
"learning_rate": 1.0426575505350774e-05, |
|
"loss": 2.1127, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 15.84794887039239, |
|
"grad_norm": 7.36287784576416, |
|
"learning_rate": 1.0380127824019026e-05, |
|
"loss": 2.1104, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 15.866527942925089, |
|
"grad_norm": 7.004654884338379, |
|
"learning_rate": 1.0333680142687277e-05, |
|
"loss": 2.13, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 15.885107015457788, |
|
"grad_norm": 6.9634528160095215, |
|
"learning_rate": 1.0287232461355529e-05, |
|
"loss": 2.1196, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 15.903686087990488, |
|
"grad_norm": 7.970580101013184, |
|
"learning_rate": 1.0240784780023783e-05, |
|
"loss": 2.1144, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 15.922265160523187, |
|
"grad_norm": 7.777002334594727, |
|
"learning_rate": 1.0194337098692033e-05, |
|
"loss": 2.1226, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 15.940844233055886, |
|
"grad_norm": 6.956545352935791, |
|
"learning_rate": 1.0147889417360285e-05, |
|
"loss": 2.1353, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 15.959423305588585, |
|
"grad_norm": 7.85087251663208, |
|
"learning_rate": 1.010144173602854e-05, |
|
"loss": 2.1281, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 15.978002378121284, |
|
"grad_norm": 8.030372619628906, |
|
"learning_rate": 1.005499405469679e-05, |
|
"loss": 2.1247, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 15.996581450653984, |
|
"grad_norm": 7.764926433563232, |
|
"learning_rate": 1.0008546373365042e-05, |
|
"loss": 2.1291, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 16.015160523186683, |
|
"grad_norm": 6.365900039672852, |
|
"learning_rate": 9.962098692033294e-06, |
|
"loss": 2.098, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 16.033739595719382, |
|
"grad_norm": 7.203670024871826, |
|
"learning_rate": 9.915651010701546e-06, |
|
"loss": 2.1182, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 16.05231866825208, |
|
"grad_norm": 7.516459941864014, |
|
"learning_rate": 9.869203329369798e-06, |
|
"loss": 2.0847, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 16.07089774078478, |
|
"grad_norm": 6.9018235206604, |
|
"learning_rate": 9.822755648038049e-06, |
|
"loss": 2.0801, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 16.08947681331748, |
|
"grad_norm": 7.418632507324219, |
|
"learning_rate": 9.776307966706303e-06, |
|
"loss": 2.0981, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 16.108055885850177, |
|
"grad_norm": 7.646805763244629, |
|
"learning_rate": 9.729860285374555e-06, |
|
"loss": 2.1049, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 16.126634958382876, |
|
"grad_norm": 6.691248893737793, |
|
"learning_rate": 9.683412604042806e-06, |
|
"loss": 2.1026, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 16.145214030915575, |
|
"grad_norm": 8.201228141784668, |
|
"learning_rate": 9.63696492271106e-06, |
|
"loss": 2.1197, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 16.163793103448278, |
|
"grad_norm": 6.836193561553955, |
|
"learning_rate": 9.590517241379312e-06, |
|
"loss": 2.1048, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 16.182372175980976, |
|
"grad_norm": 6.607935905456543, |
|
"learning_rate": 9.544069560047562e-06, |
|
"loss": 2.0952, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 16.200951248513675, |
|
"grad_norm": 7.329438209533691, |
|
"learning_rate": 9.497621878715814e-06, |
|
"loss": 2.1096, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 16.219530321046374, |
|
"grad_norm": 7.701877117156982, |
|
"learning_rate": 9.451174197384068e-06, |
|
"loss": 2.1061, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 16.238109393579073, |
|
"grad_norm": 6.743167877197266, |
|
"learning_rate": 9.404726516052319e-06, |
|
"loss": 2.1151, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 16.256688466111772, |
|
"grad_norm": 7.008676528930664, |
|
"learning_rate": 9.35827883472057e-06, |
|
"loss": 2.1113, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 16.27526753864447, |
|
"grad_norm": 7.036728858947754, |
|
"learning_rate": 9.311831153388825e-06, |
|
"loss": 2.0898, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 16.29384661117717, |
|
"grad_norm": 7.374510765075684, |
|
"learning_rate": 9.265383472057075e-06, |
|
"loss": 2.0845, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 16.31242568370987, |
|
"grad_norm": 7.095835208892822, |
|
"learning_rate": 9.218935790725327e-06, |
|
"loss": 2.117, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 16.331004756242567, |
|
"grad_norm": 7.737233638763428, |
|
"learning_rate": 9.17248810939358e-06, |
|
"loss": 2.0987, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 16.349583828775266, |
|
"grad_norm": 7.745171546936035, |
|
"learning_rate": 9.126040428061832e-06, |
|
"loss": 2.0858, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 16.368162901307965, |
|
"grad_norm": 6.25264835357666, |
|
"learning_rate": 9.079592746730084e-06, |
|
"loss": 2.1014, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 16.386741973840667, |
|
"grad_norm": 8.324295043945312, |
|
"learning_rate": 9.033145065398334e-06, |
|
"loss": 2.1006, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 16.405321046373366, |
|
"grad_norm": 7.7967352867126465, |
|
"learning_rate": 8.986697384066588e-06, |
|
"loss": 2.0758, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 16.423900118906065, |
|
"grad_norm": 7.272579193115234, |
|
"learning_rate": 8.94024970273484e-06, |
|
"loss": 2.1062, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 16.442479191438764, |
|
"grad_norm": 7.0281195640563965, |
|
"learning_rate": 8.893802021403091e-06, |
|
"loss": 2.0979, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 16.461058263971463, |
|
"grad_norm": 7.969797611236572, |
|
"learning_rate": 8.847354340071345e-06, |
|
"loss": 2.0782, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 16.47963733650416, |
|
"grad_norm": 7.431224822998047, |
|
"learning_rate": 8.800906658739597e-06, |
|
"loss": 2.0986, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 16.49821640903686, |
|
"grad_norm": 7.004672050476074, |
|
"learning_rate": 8.754458977407848e-06, |
|
"loss": 2.1162, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 16.51679548156956, |
|
"grad_norm": 7.328388214111328, |
|
"learning_rate": 8.7080112960761e-06, |
|
"loss": 2.0903, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 16.53537455410226, |
|
"grad_norm": 7.997599124908447, |
|
"learning_rate": 8.661563614744354e-06, |
|
"loss": 2.0926, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 16.553953626634957, |
|
"grad_norm": 6.598504066467285, |
|
"learning_rate": 8.615115933412604e-06, |
|
"loss": 2.103, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 16.572532699167656, |
|
"grad_norm": 8.041633605957031, |
|
"learning_rate": 8.568668252080856e-06, |
|
"loss": 2.0902, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 16.591111771700355, |
|
"grad_norm": 6.456114768981934, |
|
"learning_rate": 8.522220570749109e-06, |
|
"loss": 2.1113, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 16.609690844233057, |
|
"grad_norm": 8.524587631225586, |
|
"learning_rate": 8.47577288941736e-06, |
|
"loss": 2.1034, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 16.628269916765756, |
|
"grad_norm": 7.4559102058410645, |
|
"learning_rate": 8.429325208085613e-06, |
|
"loss": 2.0911, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 16.646848989298455, |
|
"grad_norm": 7.678273677825928, |
|
"learning_rate": 8.382877526753865e-06, |
|
"loss": 2.1009, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 16.665428061831154, |
|
"grad_norm": 6.468957424163818, |
|
"learning_rate": 8.336429845422117e-06, |
|
"loss": 2.104, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 16.684007134363853, |
|
"grad_norm": 7.746886730194092, |
|
"learning_rate": 8.28998216409037e-06, |
|
"loss": 2.0961, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 16.70258620689655, |
|
"grad_norm": 6.837747097015381, |
|
"learning_rate": 8.24353448275862e-06, |
|
"loss": 2.0867, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 16.72116527942925, |
|
"grad_norm": 7.098623275756836, |
|
"learning_rate": 8.197086801426874e-06, |
|
"loss": 2.1093, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 16.73974435196195, |
|
"grad_norm": 6.478063106536865, |
|
"learning_rate": 8.150639120095126e-06, |
|
"loss": 2.0933, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 16.758323424494648, |
|
"grad_norm": 7.2032012939453125, |
|
"learning_rate": 8.104191438763376e-06, |
|
"loss": 2.0801, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 16.776902497027347, |
|
"grad_norm": 6.382145881652832, |
|
"learning_rate": 8.057743757431629e-06, |
|
"loss": 2.1063, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 16.795481569560046, |
|
"grad_norm": 7.381346702575684, |
|
"learning_rate": 8.011296076099883e-06, |
|
"loss": 2.0992, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 16.81406064209275, |
|
"grad_norm": 6.544224739074707, |
|
"learning_rate": 7.964848394768133e-06, |
|
"loss": 2.0993, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 16.832639714625447, |
|
"grad_norm": 7.141576290130615, |
|
"learning_rate": 7.918400713436385e-06, |
|
"loss": 2.0919, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 16.851218787158146, |
|
"grad_norm": 6.41404914855957, |
|
"learning_rate": 7.871953032104639e-06, |
|
"loss": 2.0961, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 16.869797859690845, |
|
"grad_norm": 7.792717933654785, |
|
"learning_rate": 7.82550535077289e-06, |
|
"loss": 2.0875, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 16.888376932223544, |
|
"grad_norm": 8.0609130859375, |
|
"learning_rate": 7.779057669441142e-06, |
|
"loss": 2.0905, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 16.906956004756243, |
|
"grad_norm": 7.00869083404541, |
|
"learning_rate": 7.732609988109394e-06, |
|
"loss": 2.0877, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 16.92553507728894, |
|
"grad_norm": 7.780180931091309, |
|
"learning_rate": 7.686162306777646e-06, |
|
"loss": 2.0956, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 16.94411414982164, |
|
"grad_norm": 7.056099891662598, |
|
"learning_rate": 7.639714625445898e-06, |
|
"loss": 2.0762, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 16.96269322235434, |
|
"grad_norm": 6.861847877502441, |
|
"learning_rate": 7.59326694411415e-06, |
|
"loss": 2.0946, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 16.981272294887038, |
|
"grad_norm": 7.449362754821777, |
|
"learning_rate": 7.546819262782402e-06, |
|
"loss": 2.1104, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 16.999851367419737, |
|
"grad_norm": 7.2395782470703125, |
|
"learning_rate": 7.500371581450655e-06, |
|
"loss": 2.1044, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 17.01843043995244, |
|
"grad_norm": 7.514138221740723, |
|
"learning_rate": 7.453923900118906e-06, |
|
"loss": 2.0918, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 17.037009512485138, |
|
"grad_norm": 6.817535877227783, |
|
"learning_rate": 7.4074762187871585e-06, |
|
"loss": 2.1078, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 17.055588585017837, |
|
"grad_norm": 7.827926158905029, |
|
"learning_rate": 7.3610285374554115e-06, |
|
"loss": 2.0747, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 17.074167657550536, |
|
"grad_norm": 9.247724533081055, |
|
"learning_rate": 7.314580856123662e-06, |
|
"loss": 2.0964, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 17.092746730083235, |
|
"grad_norm": 8.57845687866211, |
|
"learning_rate": 7.268133174791915e-06, |
|
"loss": 2.0876, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 17.111325802615934, |
|
"grad_norm": 7.123178482055664, |
|
"learning_rate": 7.221685493460167e-06, |
|
"loss": 2.0672, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 17.129904875148632, |
|
"grad_norm": 7.820250034332275, |
|
"learning_rate": 7.1752378121284185e-06, |
|
"loss": 2.0708, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 17.14848394768133, |
|
"grad_norm": 7.021051406860352, |
|
"learning_rate": 7.1287901307966716e-06, |
|
"loss": 2.0801, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 17.16706302021403, |
|
"grad_norm": 8.586702346801758, |
|
"learning_rate": 7.082342449464922e-06, |
|
"loss": 2.0749, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 17.18564209274673, |
|
"grad_norm": 6.818421363830566, |
|
"learning_rate": 7.035894768133175e-06, |
|
"loss": 2.0916, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 17.204221165279428, |
|
"grad_norm": 7.275014877319336, |
|
"learning_rate": 6.989447086801427e-06, |
|
"loss": 2.0739, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 17.222800237812127, |
|
"grad_norm": 6.750241756439209, |
|
"learning_rate": 6.942999405469679e-06, |
|
"loss": 2.0852, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 17.24137931034483, |
|
"grad_norm": 7.445390701293945, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 2.0748, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 17.259958382877528, |
|
"grad_norm": 8.087651252746582, |
|
"learning_rate": 6.850104042806184e-06, |
|
"loss": 2.0588, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 17.278537455410227, |
|
"grad_norm": 7.12742805480957, |
|
"learning_rate": 6.803656361474435e-06, |
|
"loss": 2.074, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 17.297116527942926, |
|
"grad_norm": 7.231345176696777, |
|
"learning_rate": 6.757208680142687e-06, |
|
"loss": 2.0682, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 17.315695600475625, |
|
"grad_norm": 7.275602340698242, |
|
"learning_rate": 6.71076099881094e-06, |
|
"loss": 2.0813, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 17.334274673008323, |
|
"grad_norm": 7.546669006347656, |
|
"learning_rate": 6.664313317479192e-06, |
|
"loss": 2.0839, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 17.352853745541022, |
|
"grad_norm": 7.166531085968018, |
|
"learning_rate": 6.617865636147444e-06, |
|
"loss": 2.064, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 17.37143281807372, |
|
"grad_norm": 8.803594589233398, |
|
"learning_rate": 6.571417954815696e-06, |
|
"loss": 2.0847, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 17.39001189060642, |
|
"grad_norm": 7.301925182342529, |
|
"learning_rate": 6.5249702734839475e-06, |
|
"loss": 2.0938, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 17.40859096313912, |
|
"grad_norm": 7.235419273376465, |
|
"learning_rate": 6.4785225921522005e-06, |
|
"loss": 2.0824, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 17.427170035671818, |
|
"grad_norm": 9.021172523498535, |
|
"learning_rate": 6.432074910820453e-06, |
|
"loss": 2.0924, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 17.445749108204517, |
|
"grad_norm": 7.037625789642334, |
|
"learning_rate": 6.385627229488704e-06, |
|
"loss": 2.0872, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 17.46432818073722, |
|
"grad_norm": 8.20162296295166, |
|
"learning_rate": 6.339179548156956e-06, |
|
"loss": 2.0755, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 17.482907253269918, |
|
"grad_norm": 7.615068435668945, |
|
"learning_rate": 6.2927318668252075e-06, |
|
"loss": 2.0826, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 17.501486325802617, |
|
"grad_norm": 7.641859531402588, |
|
"learning_rate": 6.2462841854934606e-06, |
|
"loss": 2.087, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 17.520065398335316, |
|
"grad_norm": 6.667306900024414, |
|
"learning_rate": 6.199836504161712e-06, |
|
"loss": 2.0842, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 17.538644470868014, |
|
"grad_norm": 6.990174770355225, |
|
"learning_rate": 6.153388822829965e-06, |
|
"loss": 2.0861, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 17.557223543400713, |
|
"grad_norm": 7.540374755859375, |
|
"learning_rate": 6.106941141498216e-06, |
|
"loss": 2.078, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 17.575802615933412, |
|
"grad_norm": 6.960676670074463, |
|
"learning_rate": 6.0604934601664685e-06, |
|
"loss": 2.0877, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 17.59438168846611, |
|
"grad_norm": 8.197839736938477, |
|
"learning_rate": 6.0140457788347215e-06, |
|
"loss": 2.0718, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 17.61296076099881, |
|
"grad_norm": 7.723132610321045, |
|
"learning_rate": 5.967598097502973e-06, |
|
"loss": 2.0793, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 17.63153983353151, |
|
"grad_norm": 6.541485786437988, |
|
"learning_rate": 5.921150416171225e-06, |
|
"loss": 2.0754, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 17.650118906064208, |
|
"grad_norm": 7.376631736755371, |
|
"learning_rate": 5.874702734839476e-06, |
|
"loss": 2.0792, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 17.66869797859691, |
|
"grad_norm": 6.127633094787598, |
|
"learning_rate": 5.828255053507729e-06, |
|
"loss": 2.0492, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 17.68727705112961, |
|
"grad_norm": 7.734124183654785, |
|
"learning_rate": 5.781807372175982e-06, |
|
"loss": 2.0748, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 17.705856123662308, |
|
"grad_norm": 6.9572601318359375, |
|
"learning_rate": 5.735359690844233e-06, |
|
"loss": 2.0992, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 17.724435196195007, |
|
"grad_norm": 6.885385513305664, |
|
"learning_rate": 5.688912009512486e-06, |
|
"loss": 2.0771, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 17.743014268727705, |
|
"grad_norm": 7.826180458068848, |
|
"learning_rate": 5.642464328180737e-06, |
|
"loss": 2.081, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 17.761593341260404, |
|
"grad_norm": 7.1644439697265625, |
|
"learning_rate": 5.5960166468489895e-06, |
|
"loss": 2.0847, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 17.780172413793103, |
|
"grad_norm": 8.081832885742188, |
|
"learning_rate": 5.549568965517242e-06, |
|
"loss": 2.072, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 17.798751486325802, |
|
"grad_norm": 6.1492600440979, |
|
"learning_rate": 5.503121284185494e-06, |
|
"loss": 2.0919, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 17.8173305588585, |
|
"grad_norm": 6.837408542633057, |
|
"learning_rate": 5.456673602853746e-06, |
|
"loss": 2.0745, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 17.8359096313912, |
|
"grad_norm": 6.619295120239258, |
|
"learning_rate": 5.410225921521997e-06, |
|
"loss": 2.0758, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 17.8544887039239, |
|
"grad_norm": 7.465627193450928, |
|
"learning_rate": 5.3637782401902504e-06, |
|
"loss": 2.0705, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 17.8730677764566, |
|
"grad_norm": 7.469555854797363, |
|
"learning_rate": 5.317330558858502e-06, |
|
"loss": 2.0675, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 17.8916468489893, |
|
"grad_norm": 7.39703893661499, |
|
"learning_rate": 5.270882877526754e-06, |
|
"loss": 2.0869, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 17.910225921522, |
|
"grad_norm": 6.684396743774414, |
|
"learning_rate": 5.224435196195006e-06, |
|
"loss": 2.0782, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 17.928804994054698, |
|
"grad_norm": 8.273653984069824, |
|
"learning_rate": 5.177987514863258e-06, |
|
"loss": 2.0792, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 17.947384066587396, |
|
"grad_norm": 7.827981472015381, |
|
"learning_rate": 5.1315398335315105e-06, |
|
"loss": 2.0862, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 17.965963139120095, |
|
"grad_norm": 7.737405300140381, |
|
"learning_rate": 5.085092152199762e-06, |
|
"loss": 2.0632, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 17.984542211652794, |
|
"grad_norm": 7.617379665374756, |
|
"learning_rate": 5.038644470868015e-06, |
|
"loss": 2.1037, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 18.003121284185493, |
|
"grad_norm": 7.147322177886963, |
|
"learning_rate": 4.992196789536266e-06, |
|
"loss": 2.0701, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 18.021700356718192, |
|
"grad_norm": 6.316223621368408, |
|
"learning_rate": 4.945749108204518e-06, |
|
"loss": 2.0536, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 18.04027942925089, |
|
"grad_norm": 7.639254093170166, |
|
"learning_rate": 4.8993014268727714e-06, |
|
"loss": 2.0594, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 18.05885850178359, |
|
"grad_norm": 7.149983882904053, |
|
"learning_rate": 4.852853745541023e-06, |
|
"loss": 2.0546, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 18.07743757431629, |
|
"grad_norm": 7.123045921325684, |
|
"learning_rate": 4.806406064209275e-06, |
|
"loss": 2.0819, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 18.09601664684899, |
|
"grad_norm": 7.2495293617248535, |
|
"learning_rate": 4.759958382877526e-06, |
|
"loss": 2.0641, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 18.11459571938169, |
|
"grad_norm": 7.309257507324219, |
|
"learning_rate": 4.713510701545779e-06, |
|
"loss": 2.0664, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 18.13317479191439, |
|
"grad_norm": 6.188238620758057, |
|
"learning_rate": 4.6670630202140315e-06, |
|
"loss": 2.0801, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 18.151753864447087, |
|
"grad_norm": 7.894054889678955, |
|
"learning_rate": 4.620615338882283e-06, |
|
"loss": 2.0576, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 18.170332936979786, |
|
"grad_norm": 7.271005153656006, |
|
"learning_rate": 4.574167657550536e-06, |
|
"loss": 2.0477, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 18.188912009512485, |
|
"grad_norm": 7.505859851837158, |
|
"learning_rate": 4.527719976218787e-06, |
|
"loss": 2.0726, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 18.207491082045184, |
|
"grad_norm": 7.29171085357666, |
|
"learning_rate": 4.4812722948870394e-06, |
|
"loss": 2.0641, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 18.226070154577883, |
|
"grad_norm": 7.959132671356201, |
|
"learning_rate": 4.434824613555292e-06, |
|
"loss": 2.0704, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 18.244649227110582, |
|
"grad_norm": 6.843657493591309, |
|
"learning_rate": 4.388376932223544e-06, |
|
"loss": 2.0695, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 18.26322829964328, |
|
"grad_norm": 6.887396812438965, |
|
"learning_rate": 4.341929250891796e-06, |
|
"loss": 2.0622, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 18.28180737217598, |
|
"grad_norm": 7.143764019012451, |
|
"learning_rate": 4.295481569560047e-06, |
|
"loss": 2.0729, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 18.30038644470868, |
|
"grad_norm": 7.79412841796875, |
|
"learning_rate": 4.2490338882283e-06, |
|
"loss": 2.054, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 18.31896551724138, |
|
"grad_norm": 7.328498363494873, |
|
"learning_rate": 4.202586206896552e-06, |
|
"loss": 2.0582, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 18.33754458977408, |
|
"grad_norm": 6.897115230560303, |
|
"learning_rate": 4.156138525564804e-06, |
|
"loss": 2.0548, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 18.35612366230678, |
|
"grad_norm": 7.248096942901611, |
|
"learning_rate": 4.109690844233056e-06, |
|
"loss": 2.0668, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 18.374702734839477, |
|
"grad_norm": 8.02023983001709, |
|
"learning_rate": 4.063243162901308e-06, |
|
"loss": 2.0527, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 18.393281807372176, |
|
"grad_norm": 7.436459541320801, |
|
"learning_rate": 4.0167954815695605e-06, |
|
"loss": 2.0685, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 18.411860879904875, |
|
"grad_norm": 7.74916410446167, |
|
"learning_rate": 3.970347800237812e-06, |
|
"loss": 2.0709, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 18.430439952437574, |
|
"grad_norm": 8.027193069458008, |
|
"learning_rate": 3.923900118906065e-06, |
|
"loss": 2.0614, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 18.449019024970273, |
|
"grad_norm": 6.885724067687988, |
|
"learning_rate": 3.877452437574316e-06, |
|
"loss": 2.0519, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 18.46759809750297, |
|
"grad_norm": 7.010785102844238, |
|
"learning_rate": 3.831004756242568e-06, |
|
"loss": 2.068, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 18.48617717003567, |
|
"grad_norm": 6.670163631439209, |
|
"learning_rate": 3.78455707491082e-06, |
|
"loss": 2.0695, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 18.50475624256837, |
|
"grad_norm": 7.944169998168945, |
|
"learning_rate": 3.7381093935790727e-06, |
|
"loss": 2.0723, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 18.523335315101072, |
|
"grad_norm": 5.955043792724609, |
|
"learning_rate": 3.691661712247325e-06, |
|
"loss": 2.0456, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 18.54191438763377, |
|
"grad_norm": 7.109121322631836, |
|
"learning_rate": 3.6452140309155767e-06, |
|
"loss": 2.0612, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 18.56049346016647, |
|
"grad_norm": 6.318941593170166, |
|
"learning_rate": 3.5987663495838293e-06, |
|
"loss": 2.0702, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 18.57907253269917, |
|
"grad_norm": 8.715611457824707, |
|
"learning_rate": 3.552318668252081e-06, |
|
"loss": 2.08, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 18.597651605231867, |
|
"grad_norm": 9.135302543640137, |
|
"learning_rate": 3.505870986920333e-06, |
|
"loss": 2.0762, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 18.616230677764566, |
|
"grad_norm": 7.45161247253418, |
|
"learning_rate": 3.4594233055885854e-06, |
|
"loss": 2.0533, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 18.634809750297265, |
|
"grad_norm": 8.179544448852539, |
|
"learning_rate": 3.412975624256837e-06, |
|
"loss": 2.0548, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 18.653388822829964, |
|
"grad_norm": 9.695868492126465, |
|
"learning_rate": 3.3665279429250894e-06, |
|
"loss": 2.0502, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 18.671967895362663, |
|
"grad_norm": 9.127315521240234, |
|
"learning_rate": 3.320080261593341e-06, |
|
"loss": 2.0559, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 18.69054696789536, |
|
"grad_norm": 7.58563232421875, |
|
"learning_rate": 3.2736325802615937e-06, |
|
"loss": 2.0618, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 18.70912604042806, |
|
"grad_norm": 6.781043529510498, |
|
"learning_rate": 3.2271848989298455e-06, |
|
"loss": 2.0654, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 18.727705112960763, |
|
"grad_norm": 7.929651737213135, |
|
"learning_rate": 3.1807372175980973e-06, |
|
"loss": 2.0493, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 18.74628418549346, |
|
"grad_norm": 7.395569324493408, |
|
"learning_rate": 3.13428953626635e-06, |
|
"loss": 2.042, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 18.76486325802616, |
|
"grad_norm": 8.050883293151855, |
|
"learning_rate": 3.087841854934602e-06, |
|
"loss": 2.0625, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 18.78344233055886, |
|
"grad_norm": 8.531946182250977, |
|
"learning_rate": 3.041394173602854e-06, |
|
"loss": 2.0636, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 18.80202140309156, |
|
"grad_norm": 7.74788236618042, |
|
"learning_rate": 2.994946492271106e-06, |
|
"loss": 2.0758, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 18.820600475624257, |
|
"grad_norm": 7.532721996307373, |
|
"learning_rate": 2.9484988109393578e-06, |
|
"loss": 2.0559, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 18.839179548156956, |
|
"grad_norm": 6.848814487457275, |
|
"learning_rate": 2.90205112960761e-06, |
|
"loss": 2.0552, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 18.857758620689655, |
|
"grad_norm": 7.606546401977539, |
|
"learning_rate": 2.855603448275862e-06, |
|
"loss": 2.0528, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 18.876337693222354, |
|
"grad_norm": 7.560408592224121, |
|
"learning_rate": 2.8091557669441143e-06, |
|
"loss": 2.0529, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 18.894916765755053, |
|
"grad_norm": 8.788424491882324, |
|
"learning_rate": 2.7627080856123665e-06, |
|
"loss": 2.0564, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 18.91349583828775, |
|
"grad_norm": 6.34813928604126, |
|
"learning_rate": 2.7162604042806183e-06, |
|
"loss": 2.0517, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 18.93207491082045, |
|
"grad_norm": 7.5938005447387695, |
|
"learning_rate": 2.6698127229488705e-06, |
|
"loss": 2.0615, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 18.950653983353153, |
|
"grad_norm": 7.773651123046875, |
|
"learning_rate": 2.6233650416171222e-06, |
|
"loss": 2.0572, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 18.96923305588585, |
|
"grad_norm": 6.474369049072266, |
|
"learning_rate": 2.576917360285375e-06, |
|
"loss": 2.0563, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 18.98781212841855, |
|
"grad_norm": 7.805785179138184, |
|
"learning_rate": 2.530469678953627e-06, |
|
"loss": 2.0697, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 19.00639120095125, |
|
"grad_norm": 6.911838054656982, |
|
"learning_rate": 2.484021997621879e-06, |
|
"loss": 2.066, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 19.024970273483948, |
|
"grad_norm": 7.869637966156006, |
|
"learning_rate": 2.437574316290131e-06, |
|
"loss": 2.0383, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 19.043549346016647, |
|
"grad_norm": 8.383206367492676, |
|
"learning_rate": 2.3911266349583828e-06, |
|
"loss": 2.0579, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 19.062128418549346, |
|
"grad_norm": 8.408047676086426, |
|
"learning_rate": 2.344678953626635e-06, |
|
"loss": 2.0651, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 19.080707491082045, |
|
"grad_norm": 7.509279251098633, |
|
"learning_rate": 2.298231272294887e-06, |
|
"loss": 2.0762, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 19.099286563614744, |
|
"grad_norm": 7.116653919219971, |
|
"learning_rate": 2.2517835909631393e-06, |
|
"loss": 2.05, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 19.117865636147442, |
|
"grad_norm": 6.725174427032471, |
|
"learning_rate": 2.2053359096313915e-06, |
|
"loss": 2.0654, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 19.13644470868014, |
|
"grad_norm": 6.7676544189453125, |
|
"learning_rate": 2.1588882282996433e-06, |
|
"loss": 2.0608, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 19.15502378121284, |
|
"grad_norm": 7.207517147064209, |
|
"learning_rate": 2.1124405469678954e-06, |
|
"loss": 2.06, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 19.173602853745543, |
|
"grad_norm": 7.989516735076904, |
|
"learning_rate": 2.065992865636147e-06, |
|
"loss": 2.034, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 19.19218192627824, |
|
"grad_norm": 7.003942489624023, |
|
"learning_rate": 2.0195451843044e-06, |
|
"loss": 2.0518, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 19.21076099881094, |
|
"grad_norm": 6.7362236976623535, |
|
"learning_rate": 1.973097502972652e-06, |
|
"loss": 2.0609, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 19.22934007134364, |
|
"grad_norm": 6.881633758544922, |
|
"learning_rate": 1.9266498216409038e-06, |
|
"loss": 2.0541, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 19.247919143876338, |
|
"grad_norm": 7.07053279876709, |
|
"learning_rate": 1.880202140309156e-06, |
|
"loss": 2.037, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 19.266498216409037, |
|
"grad_norm": 7.328449249267578, |
|
"learning_rate": 1.833754458977408e-06, |
|
"loss": 2.0379, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 19.285077288941736, |
|
"grad_norm": 7.447302341461182, |
|
"learning_rate": 1.7873067776456601e-06, |
|
"loss": 2.0461, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 19.303656361474435, |
|
"grad_norm": 6.588597774505615, |
|
"learning_rate": 1.7408590963139119e-06, |
|
"loss": 2.0395, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 19.322235434007133, |
|
"grad_norm": 6.768307685852051, |
|
"learning_rate": 1.694411414982164e-06, |
|
"loss": 2.0747, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 19.340814506539832, |
|
"grad_norm": 7.688553810119629, |
|
"learning_rate": 1.6479637336504165e-06, |
|
"loss": 2.0446, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 19.35939357907253, |
|
"grad_norm": 8.146514892578125, |
|
"learning_rate": 1.6015160523186682e-06, |
|
"loss": 2.0389, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 19.377972651605234, |
|
"grad_norm": 8.384140968322754, |
|
"learning_rate": 1.5550683709869204e-06, |
|
"loss": 2.0367, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 19.396551724137932, |
|
"grad_norm": 7.63324499130249, |
|
"learning_rate": 1.5086206896551726e-06, |
|
"loss": 2.0461, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 19.41513079667063, |
|
"grad_norm": 6.784617900848389, |
|
"learning_rate": 1.4621730083234246e-06, |
|
"loss": 2.0453, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 19.43370986920333, |
|
"grad_norm": 6.640545845031738, |
|
"learning_rate": 1.4157253269916766e-06, |
|
"loss": 2.0388, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 19.45228894173603, |
|
"grad_norm": 6.723217487335205, |
|
"learning_rate": 1.3692776456599287e-06, |
|
"loss": 2.0414, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 19.470868014268728, |
|
"grad_norm": 6.989643573760986, |
|
"learning_rate": 1.3228299643281807e-06, |
|
"loss": 2.055, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 19.489447086801427, |
|
"grad_norm": 6.394150257110596, |
|
"learning_rate": 1.276382282996433e-06, |
|
"loss": 2.0539, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 19.508026159334126, |
|
"grad_norm": 7.73260498046875, |
|
"learning_rate": 1.229934601664685e-06, |
|
"loss": 2.0549, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 19.526605231866824, |
|
"grad_norm": 7.458393096923828, |
|
"learning_rate": 1.183486920332937e-06, |
|
"loss": 2.0486, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 19.545184304399523, |
|
"grad_norm": 7.173522472381592, |
|
"learning_rate": 1.137039239001189e-06, |
|
"loss": 2.0644, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 19.563763376932222, |
|
"grad_norm": 7.556340217590332, |
|
"learning_rate": 1.0905915576694412e-06, |
|
"loss": 2.0456, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 19.582342449464925, |
|
"grad_norm": 8.111367225646973, |
|
"learning_rate": 1.0441438763376932e-06, |
|
"loss": 2.0397, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 19.600921521997623, |
|
"grad_norm": 6.623202323913574, |
|
"learning_rate": 9.976961950059454e-07, |
|
"loss": 2.0488, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 19.619500594530322, |
|
"grad_norm": 7.327664375305176, |
|
"learning_rate": 9.512485136741974e-07, |
|
"loss": 2.037, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 19.63807966706302, |
|
"grad_norm": 7.518941879272461, |
|
"learning_rate": 9.048008323424495e-07, |
|
"loss": 2.0694, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 19.65665873959572, |
|
"grad_norm": 7.00496244430542, |
|
"learning_rate": 8.583531510107016e-07, |
|
"loss": 2.0459, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 19.67523781212842, |
|
"grad_norm": 7.160311222076416, |
|
"learning_rate": 8.119054696789537e-07, |
|
"loss": 2.029, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 19.693816884661118, |
|
"grad_norm": 7.951440811157227, |
|
"learning_rate": 7.654577883472057e-07, |
|
"loss": 2.0529, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 19.712395957193817, |
|
"grad_norm": 7.71318244934082, |
|
"learning_rate": 7.190101070154579e-07, |
|
"loss": 2.057, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 19.730975029726515, |
|
"grad_norm": 7.5362043380737305, |
|
"learning_rate": 6.7256242568371e-07, |
|
"loss": 2.0493, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 19.749554102259214, |
|
"grad_norm": 8.362653732299805, |
|
"learning_rate": 6.261147443519619e-07, |
|
"loss": 2.0467, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 19.768133174791913, |
|
"grad_norm": 7.16049337387085, |
|
"learning_rate": 5.79667063020214e-07, |
|
"loss": 2.0346, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 19.786712247324612, |
|
"grad_norm": 7.634875297546387, |
|
"learning_rate": 5.332193816884662e-07, |
|
"loss": 2.035, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 19.805291319857314, |
|
"grad_norm": 7.416409015655518, |
|
"learning_rate": 4.867717003567182e-07, |
|
"loss": 2.0513, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 19.823870392390013, |
|
"grad_norm": 6.575763702392578, |
|
"learning_rate": 4.4032401902497025e-07, |
|
"loss": 2.0582, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 19.842449464922712, |
|
"grad_norm": 7.2025909423828125, |
|
"learning_rate": 3.938763376932224e-07, |
|
"loss": 2.0534, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 19.86102853745541, |
|
"grad_norm": 7.560851573944092, |
|
"learning_rate": 3.4742865636147446e-07, |
|
"loss": 2.0566, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 19.87960760998811, |
|
"grad_norm": 7.525179386138916, |
|
"learning_rate": 3.0098097502972654e-07, |
|
"loss": 2.0374, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 19.89818668252081, |
|
"grad_norm": 6.616377830505371, |
|
"learning_rate": 2.5453329369797857e-07, |
|
"loss": 2.0483, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 19.916765755053508, |
|
"grad_norm": 7.127399444580078, |
|
"learning_rate": 2.0808561236623068e-07, |
|
"loss": 2.0612, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 19.935344827586206, |
|
"grad_norm": 8.101058959960938, |
|
"learning_rate": 1.6163793103448276e-07, |
|
"loss": 2.046, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 19.953923900118905, |
|
"grad_norm": 7.135190010070801, |
|
"learning_rate": 1.1519024970273484e-07, |
|
"loss": 2.0619, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 19.972502972651604, |
|
"grad_norm": 7.481634616851807, |
|
"learning_rate": 6.874256837098692e-08, |
|
"loss": 2.0529, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 19.991082045184303, |
|
"grad_norm": 7.486691474914551, |
|
"learning_rate": 2.2294887039239002e-08, |
|
"loss": 2.0536, |
|
"step": 538000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 538240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.258410922006282e+17, |
|
"train_batch_size": 46, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|