|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9967396367023755, |
|
"eval_steps": 500, |
|
"global_step": 4292, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004657661853749418, |
|
"grad_norm": 6.92752168275594, |
|
"learning_rate": 5.813953488372093e-07, |
|
"loss": 0.8029, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009315323707498836, |
|
"grad_norm": 3.1206366140040105, |
|
"learning_rate": 1.1627906976744186e-06, |
|
"loss": 0.7421, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013972985561248253, |
|
"grad_norm": 1.9977796339339233, |
|
"learning_rate": 1.744186046511628e-06, |
|
"loss": 0.6853, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.018630647414997672, |
|
"grad_norm": 2.4072474593437447, |
|
"learning_rate": 2.325581395348837e-06, |
|
"loss": 0.6596, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02328830926874709, |
|
"grad_norm": 1.1116914304374472, |
|
"learning_rate": 2.9069767441860468e-06, |
|
"loss": 0.6228, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.027945971122496506, |
|
"grad_norm": 0.9277881373299325, |
|
"learning_rate": 3.488372093023256e-06, |
|
"loss": 0.6003, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.032603632976245925, |
|
"grad_norm": 0.7408949966084368, |
|
"learning_rate": 4.0697674418604655e-06, |
|
"loss": 0.5837, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.037261294829995344, |
|
"grad_norm": 0.7107225566304457, |
|
"learning_rate": 4.651162790697674e-06, |
|
"loss": 0.5644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04191895668374476, |
|
"grad_norm": 0.5848145784293148, |
|
"learning_rate": 5.232558139534884e-06, |
|
"loss": 0.5269, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04657661853749418, |
|
"grad_norm": 0.5140529676345665, |
|
"learning_rate": 5.8139534883720935e-06, |
|
"loss": 0.5285, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05123428039124359, |
|
"grad_norm": 0.4961933729423096, |
|
"learning_rate": 6.395348837209303e-06, |
|
"loss": 0.5164, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05589194224499301, |
|
"grad_norm": 0.6027754913232912, |
|
"learning_rate": 6.976744186046512e-06, |
|
"loss": 0.5123, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06054960409874243, |
|
"grad_norm": 0.5112041376492991, |
|
"learning_rate": 7.558139534883721e-06, |
|
"loss": 0.5051, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06520726595249185, |
|
"grad_norm": 0.5749336250262522, |
|
"learning_rate": 8.139534883720931e-06, |
|
"loss": 0.4849, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06986492780624126, |
|
"grad_norm": 0.6201245684634927, |
|
"learning_rate": 8.72093023255814e-06, |
|
"loss": 0.4873, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07452258965999069, |
|
"grad_norm": 0.5673174922712911, |
|
"learning_rate": 9.302325581395349e-06, |
|
"loss": 0.4987, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0791802515137401, |
|
"grad_norm": 0.5262178696818207, |
|
"learning_rate": 9.883720930232558e-06, |
|
"loss": 0.4779, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08383791336748952, |
|
"grad_norm": 0.5695886644689018, |
|
"learning_rate": 1.0465116279069768e-05, |
|
"loss": 0.4848, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08849557522123894, |
|
"grad_norm": 0.565733598911647, |
|
"learning_rate": 1.1046511627906977e-05, |
|
"loss": 0.4899, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09315323707498836, |
|
"grad_norm": 0.5859263883507883, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 0.4808, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09781089892873777, |
|
"grad_norm": 0.7262514612828005, |
|
"learning_rate": 1.2209302325581395e-05, |
|
"loss": 0.4838, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10246856078248719, |
|
"grad_norm": 0.614430086368894, |
|
"learning_rate": 1.2790697674418606e-05, |
|
"loss": 0.4728, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10712622263623661, |
|
"grad_norm": 0.6018869957383723, |
|
"learning_rate": 1.3372093023255814e-05, |
|
"loss": 0.473, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11178388448998602, |
|
"grad_norm": 0.6977395666291641, |
|
"learning_rate": 1.3953488372093024e-05, |
|
"loss": 0.4844, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11644154634373545, |
|
"grad_norm": 0.6403151057589088, |
|
"learning_rate": 1.4534883720930233e-05, |
|
"loss": 0.4652, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12109920819748486, |
|
"grad_norm": 0.6871710421028616, |
|
"learning_rate": 1.5116279069767441e-05, |
|
"loss": 0.468, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1257568700512343, |
|
"grad_norm": 0.6015122476741315, |
|
"learning_rate": 1.569767441860465e-05, |
|
"loss": 0.4752, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1304145319049837, |
|
"grad_norm": 0.7033847892042502, |
|
"learning_rate": 1.6279069767441862e-05, |
|
"loss": 0.4692, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1350721937587331, |
|
"grad_norm": 0.8170324351792959, |
|
"learning_rate": 1.686046511627907e-05, |
|
"loss": 0.4792, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13972985561248252, |
|
"grad_norm": 0.7719634478496668, |
|
"learning_rate": 1.744186046511628e-05, |
|
"loss": 0.4739, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14438751746623196, |
|
"grad_norm": 0.8600962139069631, |
|
"learning_rate": 1.802325581395349e-05, |
|
"loss": 0.4566, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14904517931998137, |
|
"grad_norm": 0.8190039066561372, |
|
"learning_rate": 1.8604651162790697e-05, |
|
"loss": 0.4636, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1537028411737308, |
|
"grad_norm": 0.7921799218082787, |
|
"learning_rate": 1.918604651162791e-05, |
|
"loss": 0.4648, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1583605030274802, |
|
"grad_norm": 0.9330951643065268, |
|
"learning_rate": 1.9767441860465116e-05, |
|
"loss": 0.4631, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1630181648812296, |
|
"grad_norm": 0.6469151250901515, |
|
"learning_rate": 2.0348837209302328e-05, |
|
"loss": 0.4742, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16767582673497905, |
|
"grad_norm": 0.7444083364826932, |
|
"learning_rate": 2.0930232558139536e-05, |
|
"loss": 0.4694, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17233348858872846, |
|
"grad_norm": 0.9053753262064802, |
|
"learning_rate": 2.1511627906976744e-05, |
|
"loss": 0.4679, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 0.696915456370512, |
|
"learning_rate": 2.2093023255813955e-05, |
|
"loss": 0.4617, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18164881229622729, |
|
"grad_norm": 0.7765718674340899, |
|
"learning_rate": 2.2674418604651163e-05, |
|
"loss": 0.4677, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18630647414997673, |
|
"grad_norm": 0.5706209115733197, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.4422, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19096413600372614, |
|
"grad_norm": 0.8866665224464126, |
|
"learning_rate": 2.3837209302325582e-05, |
|
"loss": 0.4634, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.19562179785747555, |
|
"grad_norm": 0.6314129248025072, |
|
"learning_rate": 2.441860465116279e-05, |
|
"loss": 0.453, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20027945971122496, |
|
"grad_norm": 0.7431495556273111, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4641, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.20493712156497437, |
|
"grad_norm": 0.5892331057656331, |
|
"learning_rate": 2.5581395348837212e-05, |
|
"loss": 0.4604, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2095947834187238, |
|
"grad_norm": 0.6573016213315429, |
|
"learning_rate": 2.616279069767442e-05, |
|
"loss": 0.4634, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.21425244527247322, |
|
"grad_norm": 0.705234238344836, |
|
"learning_rate": 2.674418604651163e-05, |
|
"loss": 0.4618, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.21891010712622264, |
|
"grad_norm": 0.7161035621507849, |
|
"learning_rate": 2.7325581395348836e-05, |
|
"loss": 0.4573, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.22356776897997205, |
|
"grad_norm": 0.7220402302111609, |
|
"learning_rate": 2.7906976744186048e-05, |
|
"loss": 0.4566, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.22822543083372146, |
|
"grad_norm": 0.8317272534751984, |
|
"learning_rate": 2.848837209302326e-05, |
|
"loss": 0.448, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2328830926874709, |
|
"grad_norm": 1.0116718049789115, |
|
"learning_rate": 2.9069767441860467e-05, |
|
"loss": 0.4463, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2375407545412203, |
|
"grad_norm": 1.2520264875182285, |
|
"learning_rate": 2.9651162790697678e-05, |
|
"loss": 0.4572, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.24219841639496972, |
|
"grad_norm": 0.6273556563905193, |
|
"learning_rate": 3.0232558139534883e-05, |
|
"loss": 0.4611, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24685607824871914, |
|
"grad_norm": 0.7803028876004741, |
|
"learning_rate": 3.081395348837209e-05, |
|
"loss": 0.4557, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2515137401024686, |
|
"grad_norm": 0.7348185111841634, |
|
"learning_rate": 3.13953488372093e-05, |
|
"loss": 0.4459, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.25617140195621796, |
|
"grad_norm": 0.5907461262762532, |
|
"learning_rate": 3.197674418604651e-05, |
|
"loss": 0.4619, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2608290638099674, |
|
"grad_norm": 0.966120981626802, |
|
"learning_rate": 3.2558139534883724e-05, |
|
"loss": 0.4505, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.26548672566371684, |
|
"grad_norm": 1.0403496213648828, |
|
"learning_rate": 3.313953488372093e-05, |
|
"loss": 0.4513, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2701443875174662, |
|
"grad_norm": 1.2436057586513931, |
|
"learning_rate": 3.372093023255814e-05, |
|
"loss": 0.452, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.27480204937121566, |
|
"grad_norm": 2.3331539482866757, |
|
"learning_rate": 3.430232558139535e-05, |
|
"loss": 0.4451, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.27945971122496505, |
|
"grad_norm": 1.0379481464446643, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.4573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2841173730787145, |
|
"grad_norm": 3.6031638667280483, |
|
"learning_rate": 3.5465116279069774e-05, |
|
"loss": 0.4601, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2887750349324639, |
|
"grad_norm": 0.6833415227247001, |
|
"learning_rate": 3.604651162790698e-05, |
|
"loss": 0.4558, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2934326967862133, |
|
"grad_norm": 0.8639270782587997, |
|
"learning_rate": 3.662790697674418e-05, |
|
"loss": 0.4546, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.29809035863996275, |
|
"grad_norm": 1.8215303829903469, |
|
"learning_rate": 3.7209302325581394e-05, |
|
"loss": 0.4634, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.30274802049371213, |
|
"grad_norm": 1.2592473661230983, |
|
"learning_rate": 3.7790697674418606e-05, |
|
"loss": 0.4672, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3074056823474616, |
|
"grad_norm": 1.1786650053013983, |
|
"learning_rate": 3.837209302325582e-05, |
|
"loss": 0.4573, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.312063344201211, |
|
"grad_norm": 0.9018705956781983, |
|
"learning_rate": 3.895348837209303e-05, |
|
"loss": 0.4428, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3167210060549604, |
|
"grad_norm": 0.8940848456165655, |
|
"learning_rate": 3.953488372093023e-05, |
|
"loss": 0.4548, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.32137866790870984, |
|
"grad_norm": 0.8345182625802969, |
|
"learning_rate": 4.0116279069767444e-05, |
|
"loss": 0.4453, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.3260363297624592, |
|
"grad_norm": 0.7788858852128394, |
|
"learning_rate": 4.0697674418604655e-05, |
|
"loss": 0.4497, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.33069399161620866, |
|
"grad_norm": 2.835448275262579, |
|
"learning_rate": 4.127906976744187e-05, |
|
"loss": 0.469, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3353516534699581, |
|
"grad_norm": 1.921513390569175, |
|
"learning_rate": 4.186046511627907e-05, |
|
"loss": 0.4716, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3400093153237075, |
|
"grad_norm": 1.1947987859585383, |
|
"learning_rate": 4.2441860465116276e-05, |
|
"loss": 0.4624, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3446669771774569, |
|
"grad_norm": 0.9700341651993428, |
|
"learning_rate": 4.302325581395349e-05, |
|
"loss": 0.4661, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3493246390312063, |
|
"grad_norm": 0.8969571742098159, |
|
"learning_rate": 4.36046511627907e-05, |
|
"loss": 0.4627, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 0.6881782636935027, |
|
"learning_rate": 4.418604651162791e-05, |
|
"loss": 0.4468, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3586399627387052, |
|
"grad_norm": 0.8164164954054413, |
|
"learning_rate": 4.476744186046512e-05, |
|
"loss": 0.4422, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.36329762459245457, |
|
"grad_norm": 0.7087648580913801, |
|
"learning_rate": 4.5348837209302326e-05, |
|
"loss": 0.4474, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.367955286446204, |
|
"grad_norm": 0.7023296376912398, |
|
"learning_rate": 4.593023255813954e-05, |
|
"loss": 0.4424, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.37261294829995345, |
|
"grad_norm": 0.8333076016425238, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.4414, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37727061015370283, |
|
"grad_norm": 3.1226077583229617, |
|
"learning_rate": 4.709302325581396e-05, |
|
"loss": 0.4464, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3819282720074523, |
|
"grad_norm": 3.1795716272396377, |
|
"learning_rate": 4.7674418604651164e-05, |
|
"loss": 0.4659, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.38658593386120166, |
|
"grad_norm": 1.2388415204725036, |
|
"learning_rate": 4.8255813953488375e-05, |
|
"loss": 0.4707, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3912435957149511, |
|
"grad_norm": 11.6644368029688, |
|
"learning_rate": 4.883720930232558e-05, |
|
"loss": 0.4515, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.39590125756870054, |
|
"grad_norm": 113.48820494896151, |
|
"learning_rate": 4.941860465116279e-05, |
|
"loss": 0.4821, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4005589194224499, |
|
"grad_norm": 1.0083170302325644, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4793, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.40521658127619936, |
|
"grad_norm": 0.8343252729292476, |
|
"learning_rate": 4.9935266701191095e-05, |
|
"loss": 0.4626, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.40987424312994875, |
|
"grad_norm": 1.3705818840350565, |
|
"learning_rate": 4.987053340238219e-05, |
|
"loss": 0.4755, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4145319049836982, |
|
"grad_norm": 0.9134655651221893, |
|
"learning_rate": 4.980580010357328e-05, |
|
"loss": 0.4525, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4191895668374476, |
|
"grad_norm": 0.8116030121985857, |
|
"learning_rate": 4.9741066804764374e-05, |
|
"loss": 0.4514, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.423847228691197, |
|
"grad_norm": 0.9800010870367113, |
|
"learning_rate": 4.967633350595546e-05, |
|
"loss": 0.4684, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.42850489054494645, |
|
"grad_norm": 0.6033357918612593, |
|
"learning_rate": 4.961160020714656e-05, |
|
"loss": 0.448, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.43316255239869583, |
|
"grad_norm": 0.7024480175816733, |
|
"learning_rate": 4.954686690833765e-05, |
|
"loss": 0.4561, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.43782021425244527, |
|
"grad_norm": 0.9064312821375221, |
|
"learning_rate": 4.948213360952874e-05, |
|
"loss": 0.4492, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4424778761061947, |
|
"grad_norm": 0.7172332562259943, |
|
"learning_rate": 4.941740031071983e-05, |
|
"loss": 0.4579, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4471355379599441, |
|
"grad_norm": 0.5153145090485904, |
|
"learning_rate": 4.935266701191093e-05, |
|
"loss": 0.443, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.45179319981369354, |
|
"grad_norm": 0.649992420910321, |
|
"learning_rate": 4.9287933713102025e-05, |
|
"loss": 0.4449, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4564508616674429, |
|
"grad_norm": 0.7128785726809049, |
|
"learning_rate": 4.922320041429311e-05, |
|
"loss": 0.449, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.46110852352119236, |
|
"grad_norm": 0.518743601600328, |
|
"learning_rate": 4.915846711548421e-05, |
|
"loss": 0.4413, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4657661853749418, |
|
"grad_norm": 0.7520128926024633, |
|
"learning_rate": 4.9093733816675304e-05, |
|
"loss": 0.4483, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4704238472286912, |
|
"grad_norm": 1.0686968926245148, |
|
"learning_rate": 4.902900051786639e-05, |
|
"loss": 0.4598, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4750815090824406, |
|
"grad_norm": 1.0997940553078784, |
|
"learning_rate": 4.8964267219057483e-05, |
|
"loss": 0.4433, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.47973917093619, |
|
"grad_norm": 0.9644379037478946, |
|
"learning_rate": 4.889953392024858e-05, |
|
"loss": 0.4508, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.48439683278993945, |
|
"grad_norm": 0.7240877417772227, |
|
"learning_rate": 4.883480062143967e-05, |
|
"loss": 0.4465, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4890544946436889, |
|
"grad_norm": 0.6057475216698976, |
|
"learning_rate": 4.877006732263076e-05, |
|
"loss": 0.4538, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.49371215649743827, |
|
"grad_norm": 1.0805852332397097, |
|
"learning_rate": 4.8705334023821855e-05, |
|
"loss": 0.441, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4983698183511877, |
|
"grad_norm": 0.7101163390203344, |
|
"learning_rate": 4.864060072501295e-05, |
|
"loss": 0.4474, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5030274802049371, |
|
"grad_norm": 0.5607989501155178, |
|
"learning_rate": 4.857586742620404e-05, |
|
"loss": 0.4498, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5076851420586865, |
|
"grad_norm": 0.7141168081675577, |
|
"learning_rate": 4.8511134127395134e-05, |
|
"loss": 0.4398, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5123428039124359, |
|
"grad_norm": 0.6293431988298551, |
|
"learning_rate": 4.844640082858623e-05, |
|
"loss": 0.4536, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5170004657661854, |
|
"grad_norm": 0.6257261340859561, |
|
"learning_rate": 4.838166752977732e-05, |
|
"loss": 0.4418, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5216581276199348, |
|
"grad_norm": 0.6232117352293421, |
|
"learning_rate": 4.831693423096841e-05, |
|
"loss": 0.4466, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.5876801848079608, |
|
"learning_rate": 4.82522009321595e-05, |
|
"loss": 0.4449, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 0.6227366864183466, |
|
"learning_rate": 4.81874676333506e-05, |
|
"loss": 0.4442, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5356311131811831, |
|
"grad_norm": 0.5475728503047741, |
|
"learning_rate": 4.812273433454169e-05, |
|
"loss": 0.4412, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5402887750349324, |
|
"grad_norm": 0.5077825170970056, |
|
"learning_rate": 4.8058001035732785e-05, |
|
"loss": 0.4411, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5449464368886818, |
|
"grad_norm": 0.7313103597196117, |
|
"learning_rate": 4.799326773692387e-05, |
|
"loss": 0.438, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5496040987424313, |
|
"grad_norm": 0.6058579101984163, |
|
"learning_rate": 4.792853443811497e-05, |
|
"loss": 0.4528, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5542617605961807, |
|
"grad_norm": 0.7064967055994522, |
|
"learning_rate": 4.7863801139306064e-05, |
|
"loss": 0.4435, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5589194224499301, |
|
"grad_norm": 0.6236154362787276, |
|
"learning_rate": 4.779906784049715e-05, |
|
"loss": 0.4488, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5635770843036796, |
|
"grad_norm": 0.7252402093905195, |
|
"learning_rate": 4.773433454168825e-05, |
|
"loss": 0.4248, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.568234746157429, |
|
"grad_norm": 0.668295888260378, |
|
"learning_rate": 4.766960124287934e-05, |
|
"loss": 0.4422, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5728924080111784, |
|
"grad_norm": 0.6155123020308337, |
|
"learning_rate": 4.760486794407043e-05, |
|
"loss": 0.4348, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5775500698649279, |
|
"grad_norm": 0.594045014249402, |
|
"learning_rate": 4.754013464526152e-05, |
|
"loss": 0.4377, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5822077317186772, |
|
"grad_norm": 0.9764084282981423, |
|
"learning_rate": 4.747540134645262e-05, |
|
"loss": 0.448, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5868653935724266, |
|
"grad_norm": 0.7417553163037858, |
|
"learning_rate": 4.741066804764371e-05, |
|
"loss": 0.4353, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5915230554261761, |
|
"grad_norm": 0.6844689847415286, |
|
"learning_rate": 4.73459347488348e-05, |
|
"loss": 0.4343, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5961807172799255, |
|
"grad_norm": 0.5636372991521233, |
|
"learning_rate": 4.7281201450025894e-05, |
|
"loss": 0.439, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6008383791336749, |
|
"grad_norm": 0.7487968219317778, |
|
"learning_rate": 4.721646815121699e-05, |
|
"loss": 0.4384, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6054960409874243, |
|
"grad_norm": 0.6545338432685719, |
|
"learning_rate": 4.715173485240808e-05, |
|
"loss": 0.4365, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6101537028411738, |
|
"grad_norm": 0.5457324276584636, |
|
"learning_rate": 4.708700155359917e-05, |
|
"loss": 0.4423, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6148113646949231, |
|
"grad_norm": 0.6673827973910538, |
|
"learning_rate": 4.7022268254790266e-05, |
|
"loss": 0.443, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6194690265486725, |
|
"grad_norm": 0.9349454934006193, |
|
"learning_rate": 4.695753495598136e-05, |
|
"loss": 0.4383, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.624126688402422, |
|
"grad_norm": 0.63544553454262, |
|
"learning_rate": 4.689280165717245e-05, |
|
"loss": 0.441, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6287843502561714, |
|
"grad_norm": 0.5837923413074558, |
|
"learning_rate": 4.6828068358363545e-05, |
|
"loss": 0.4328, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6334420121099208, |
|
"grad_norm": 0.5320619946457391, |
|
"learning_rate": 4.676333505955464e-05, |
|
"loss": 0.4366, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6380996739636703, |
|
"grad_norm": 0.605917494319971, |
|
"learning_rate": 4.669860176074573e-05, |
|
"loss": 0.4474, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6427573358174197, |
|
"grad_norm": 0.6653824724316433, |
|
"learning_rate": 4.6633868461936824e-05, |
|
"loss": 0.4389, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6474149976711691, |
|
"grad_norm": 0.5860908364136305, |
|
"learning_rate": 4.656913516312791e-05, |
|
"loss": 0.4369, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6520726595249184, |
|
"grad_norm": 0.5966356195762597, |
|
"learning_rate": 4.650440186431901e-05, |
|
"loss": 0.4341, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6567303213786679, |
|
"grad_norm": 0.564451568252952, |
|
"learning_rate": 4.64396685655101e-05, |
|
"loss": 0.4314, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6613879832324173, |
|
"grad_norm": 0.7035115996231885, |
|
"learning_rate": 4.637493526670119e-05, |
|
"loss": 0.4429, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6660456450861667, |
|
"grad_norm": 0.5778739558938046, |
|
"learning_rate": 4.631020196789229e-05, |
|
"loss": 0.4369, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6707033069399162, |
|
"grad_norm": 0.8934302769291517, |
|
"learning_rate": 4.624546866908338e-05, |
|
"loss": 0.4288, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6753609687936656, |
|
"grad_norm": 0.734087332564624, |
|
"learning_rate": 4.618073537027447e-05, |
|
"loss": 0.4337, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.680018630647415, |
|
"grad_norm": 0.7048613981585907, |
|
"learning_rate": 4.611600207146556e-05, |
|
"loss": 0.4399, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6846762925011645, |
|
"grad_norm": 0.5604335558697083, |
|
"learning_rate": 4.605126877265666e-05, |
|
"loss": 0.4287, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6893339543549138, |
|
"grad_norm": 0.4872919097245967, |
|
"learning_rate": 4.598653547384775e-05, |
|
"loss": 0.4333, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6939916162086632, |
|
"grad_norm": 0.6669275046240741, |
|
"learning_rate": 4.592180217503884e-05, |
|
"loss": 0.4304, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6986492780624126, |
|
"grad_norm": 0.630044864392535, |
|
"learning_rate": 4.585706887622993e-05, |
|
"loss": 0.4364, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7033069399161621, |
|
"grad_norm": 0.6450622968857808, |
|
"learning_rate": 4.5792335577421026e-05, |
|
"loss": 0.4264, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 0.6200426709320188, |
|
"learning_rate": 4.572760227861212e-05, |
|
"loss": 0.4292, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7126222636236609, |
|
"grad_norm": 0.5777601231289204, |
|
"learning_rate": 4.566286897980321e-05, |
|
"loss": 0.437, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7172799254774104, |
|
"grad_norm": 0.6569186197655689, |
|
"learning_rate": 4.5598135680994305e-05, |
|
"loss": 0.4362, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7219375873311598, |
|
"grad_norm": 0.5598715684009743, |
|
"learning_rate": 4.55334023821854e-05, |
|
"loss": 0.4252, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7265952491849091, |
|
"grad_norm": 0.6869231997726726, |
|
"learning_rate": 4.546866908337649e-05, |
|
"loss": 0.4408, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7312529110386586, |
|
"grad_norm": 0.5837022632387678, |
|
"learning_rate": 4.5403935784567584e-05, |
|
"loss": 0.4342, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.735910572892408, |
|
"grad_norm": 0.6124526257152769, |
|
"learning_rate": 4.533920248575868e-05, |
|
"loss": 0.4262, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7405682347461574, |
|
"grad_norm": 0.6724737198480268, |
|
"learning_rate": 4.527446918694977e-05, |
|
"loss": 0.4328, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7452258965999069, |
|
"grad_norm": 0.5255076953476321, |
|
"learning_rate": 4.520973588814086e-05, |
|
"loss": 0.4286, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7498835584536563, |
|
"grad_norm": 0.5944855149791919, |
|
"learning_rate": 4.5145002589331956e-05, |
|
"loss": 0.4435, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7545412203074057, |
|
"grad_norm": 0.8048901594295268, |
|
"learning_rate": 4.508026929052305e-05, |
|
"loss": 0.4407, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.759198882161155, |
|
"grad_norm": 0.6454599524510284, |
|
"learning_rate": 4.501553599171414e-05, |
|
"loss": 0.4327, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7638565440149045, |
|
"grad_norm": 0.5072677303046597, |
|
"learning_rate": 4.495080269290523e-05, |
|
"loss": 0.4319, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7685142058686539, |
|
"grad_norm": 0.5559797354764922, |
|
"learning_rate": 4.488606939409633e-05, |
|
"loss": 0.4206, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7731718677224033, |
|
"grad_norm": 0.5345595601661192, |
|
"learning_rate": 4.482133609528742e-05, |
|
"loss": 0.4274, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7778295295761528, |
|
"grad_norm": 0.7547024968931794, |
|
"learning_rate": 4.475660279647851e-05, |
|
"loss": 0.4358, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7824871914299022, |
|
"grad_norm": 0.6227865373803352, |
|
"learning_rate": 4.46918694976696e-05, |
|
"loss": 0.4212, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7871448532836516, |
|
"grad_norm": 0.7744646863428748, |
|
"learning_rate": 4.46271361988607e-05, |
|
"loss": 0.4266, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.7918025151374011, |
|
"grad_norm": 0.7178370613933127, |
|
"learning_rate": 4.4562402900051786e-05, |
|
"loss": 0.4286, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7964601769911505, |
|
"grad_norm": 0.6045256278401768, |
|
"learning_rate": 4.449766960124288e-05, |
|
"loss": 0.4209, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8011178388448998, |
|
"grad_norm": 0.6908919040235558, |
|
"learning_rate": 4.443293630243397e-05, |
|
"loss": 0.4232, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8057755006986492, |
|
"grad_norm": 0.607340379361927, |
|
"learning_rate": 4.436820300362507e-05, |
|
"loss": 0.4258, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8104331625523987, |
|
"grad_norm": 0.5338338967801528, |
|
"learning_rate": 4.430346970481616e-05, |
|
"loss": 0.4204, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8150908244061481, |
|
"grad_norm": 0.4097730121893453, |
|
"learning_rate": 4.423873640600725e-05, |
|
"loss": 0.4295, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8197484862598975, |
|
"grad_norm": 0.41680877490809976, |
|
"learning_rate": 4.4174003107198344e-05, |
|
"loss": 0.4141, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.824406148113647, |
|
"grad_norm": 0.45846922466853185, |
|
"learning_rate": 4.410926980838944e-05, |
|
"loss": 0.4293, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8290638099673964, |
|
"grad_norm": 0.624287985275869, |
|
"learning_rate": 4.404453650958053e-05, |
|
"loss": 0.4276, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8337214718211458, |
|
"grad_norm": 0.6427305945113978, |
|
"learning_rate": 4.397980321077162e-05, |
|
"loss": 0.4285, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8383791336748952, |
|
"grad_norm": 0.656722812459809, |
|
"learning_rate": 4.3915069911962716e-05, |
|
"loss": 0.4235, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8430367955286446, |
|
"grad_norm": 0.5762150457354466, |
|
"learning_rate": 4.385033661315381e-05, |
|
"loss": 0.4262, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.847694457382394, |
|
"grad_norm": 0.5465031708835169, |
|
"learning_rate": 4.37856033143449e-05, |
|
"loss": 0.4324, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8523521192361434, |
|
"grad_norm": 0.5937693113190048, |
|
"learning_rate": 4.3720870015535995e-05, |
|
"loss": 0.424, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8570097810898929, |
|
"grad_norm": 0.5935366518742683, |
|
"learning_rate": 4.365613671672709e-05, |
|
"loss": 0.4313, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8616674429436423, |
|
"grad_norm": 0.6991248808211339, |
|
"learning_rate": 4.359140341791818e-05, |
|
"loss": 0.4267, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8663251047973917, |
|
"grad_norm": 0.7346783269595882, |
|
"learning_rate": 4.352667011910927e-05, |
|
"loss": 0.4277, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8709827666511412, |
|
"grad_norm": 0.5339138640718899, |
|
"learning_rate": 4.346193682030037e-05, |
|
"loss": 0.4156, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8756404285048905, |
|
"grad_norm": 0.5509815538831199, |
|
"learning_rate": 4.339720352149146e-05, |
|
"loss": 0.416, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8802980903586399, |
|
"grad_norm": 0.5245746762008456, |
|
"learning_rate": 4.3332470222682546e-05, |
|
"loss": 0.4197, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 0.49232142300598086, |
|
"learning_rate": 4.326773692387364e-05, |
|
"loss": 0.4094, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8896134140661388, |
|
"grad_norm": 0.5051062616308978, |
|
"learning_rate": 4.320300362506474e-05, |
|
"loss": 0.422, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8942710759198882, |
|
"grad_norm": 0.7687269777401123, |
|
"learning_rate": 4.313827032625583e-05, |
|
"loss": 0.4276, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8989287377736377, |
|
"grad_norm": 0.5572032044424499, |
|
"learning_rate": 4.307353702744692e-05, |
|
"loss": 0.4223, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9035863996273871, |
|
"grad_norm": 0.5643322714638882, |
|
"learning_rate": 4.300880372863801e-05, |
|
"loss": 0.4259, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9082440614811365, |
|
"grad_norm": 0.5250152051480017, |
|
"learning_rate": 4.294407042982911e-05, |
|
"loss": 0.4203, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9129017233348858, |
|
"grad_norm": 1.775669542950946, |
|
"learning_rate": 4.28793371310202e-05, |
|
"loss": 0.4235, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9175593851886353, |
|
"grad_norm": 0.6989407506091581, |
|
"learning_rate": 4.281460383221129e-05, |
|
"loss": 0.4292, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9222170470423847, |
|
"grad_norm": 1.0108097597477825, |
|
"learning_rate": 4.274987053340238e-05, |
|
"loss": 0.4022, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9268747088961341, |
|
"grad_norm": 0.6870512928826218, |
|
"learning_rate": 4.2685137234593476e-05, |
|
"loss": 0.4175, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.9315323707498836, |
|
"grad_norm": 0.5676290103181388, |
|
"learning_rate": 4.262040393578457e-05, |
|
"loss": 0.4197, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.936190032603633, |
|
"grad_norm": 0.6052775213447519, |
|
"learning_rate": 4.255567063697566e-05, |
|
"loss": 0.4285, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9408476944573824, |
|
"grad_norm": 0.48531586665515636, |
|
"learning_rate": 4.2490937338166755e-05, |
|
"loss": 0.417, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9455053563111319, |
|
"grad_norm": 0.5661010529095274, |
|
"learning_rate": 4.242620403935785e-05, |
|
"loss": 0.4199, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9501630181648812, |
|
"grad_norm": 0.6079831482890822, |
|
"learning_rate": 4.236147074054894e-05, |
|
"loss": 0.4278, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9548206800186306, |
|
"grad_norm": 0.5746246985979553, |
|
"learning_rate": 4.2296737441740034e-05, |
|
"loss": 0.4201, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.95947834187238, |
|
"grad_norm": 0.5884834814710899, |
|
"learning_rate": 4.223200414293113e-05, |
|
"loss": 0.412, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9641360037261295, |
|
"grad_norm": 0.5304972411175706, |
|
"learning_rate": 4.216727084412222e-05, |
|
"loss": 0.4157, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9687936655798789, |
|
"grad_norm": 0.7966837800227516, |
|
"learning_rate": 4.2102537545313306e-05, |
|
"loss": 0.4154, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9734513274336283, |
|
"grad_norm": 0.8281870140369813, |
|
"learning_rate": 4.2037804246504406e-05, |
|
"loss": 0.4179, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9781089892873778, |
|
"grad_norm": 0.5860620536296465, |
|
"learning_rate": 4.19730709476955e-05, |
|
"loss": 0.4177, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9827666511411272, |
|
"grad_norm": 0.5890427048399142, |
|
"learning_rate": 4.190833764888659e-05, |
|
"loss": 0.4192, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.9874243129948765, |
|
"grad_norm": 0.5213412179370963, |
|
"learning_rate": 4.184360435007768e-05, |
|
"loss": 0.4222, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.992081974848626, |
|
"grad_norm": 0.7453282325289396, |
|
"learning_rate": 4.177887105126878e-05, |
|
"loss": 0.4189, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.9967396367023754, |
|
"grad_norm": 0.6133847478397344, |
|
"learning_rate": 4.171413775245987e-05, |
|
"loss": 0.4104, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.00093153237075, |
|
"grad_norm": 1.090678146779406, |
|
"learning_rate": 4.164940445365096e-05, |
|
"loss": 0.4209, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.0055891942244992, |
|
"grad_norm": 0.6095624445469302, |
|
"learning_rate": 4.158467115484205e-05, |
|
"loss": 0.3619, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.0102468560782487, |
|
"grad_norm": 0.5834527079795324, |
|
"learning_rate": 4.151993785603315e-05, |
|
"loss": 0.3498, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.0149045179319982, |
|
"grad_norm": 0.7818406149318912, |
|
"learning_rate": 4.1455204557224236e-05, |
|
"loss": 0.3651, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.0195621797857475, |
|
"grad_norm": 0.5600327410720667, |
|
"learning_rate": 4.139047125841533e-05, |
|
"loss": 0.3608, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.024219841639497, |
|
"grad_norm": 0.5373438690472865, |
|
"learning_rate": 4.132573795960642e-05, |
|
"loss": 0.3657, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0288775034932465, |
|
"grad_norm": 0.4862220275338719, |
|
"learning_rate": 4.1261004660797515e-05, |
|
"loss": 0.3661, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.0335351653469957, |
|
"grad_norm": 0.5361122770568882, |
|
"learning_rate": 4.119627136198861e-05, |
|
"loss": 0.3635, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.0381928272007452, |
|
"grad_norm": 0.4625151982803019, |
|
"learning_rate": 4.11315380631797e-05, |
|
"loss": 0.3499, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.0428504890544947, |
|
"grad_norm": 0.6531186567479866, |
|
"learning_rate": 4.1066804764370794e-05, |
|
"loss": 0.3618, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.047508150908244, |
|
"grad_norm": 0.6272666912027586, |
|
"learning_rate": 4.100207146556189e-05, |
|
"loss": 0.3652, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.0521658127619935, |
|
"grad_norm": 0.5373624121001779, |
|
"learning_rate": 4.093733816675298e-05, |
|
"loss": 0.3494, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.056823474615743, |
|
"grad_norm": 0.4965004694353307, |
|
"learning_rate": 4.087260486794407e-05, |
|
"loss": 0.3596, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.0614811364694923, |
|
"grad_norm": 0.5285596102050132, |
|
"learning_rate": 4.0807871569135166e-05, |
|
"loss": 0.3606, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.0661387983232418, |
|
"grad_norm": 0.45339136110843775, |
|
"learning_rate": 4.074313827032626e-05, |
|
"loss": 0.3595, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.0707964601769913, |
|
"grad_norm": 0.47470548262310025, |
|
"learning_rate": 4.067840497151735e-05, |
|
"loss": 0.3552, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0754541220307405, |
|
"grad_norm": 0.5222608731509295, |
|
"learning_rate": 4.0613671672708445e-05, |
|
"loss": 0.3581, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.08011178388449, |
|
"grad_norm": 0.5951129546363467, |
|
"learning_rate": 4.054893837389954e-05, |
|
"loss": 0.3653, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.0847694457382393, |
|
"grad_norm": 0.47330368757580166, |
|
"learning_rate": 4.048420507509063e-05, |
|
"loss": 0.3682, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.0894271075919888, |
|
"grad_norm": 0.6630467479912063, |
|
"learning_rate": 4.041947177628172e-05, |
|
"loss": 0.3641, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.0940847694457383, |
|
"grad_norm": 0.479092390372685, |
|
"learning_rate": 4.035473847747282e-05, |
|
"loss": 0.3636, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.0987424312994876, |
|
"grad_norm": 0.5347502580113855, |
|
"learning_rate": 4.029000517866391e-05, |
|
"loss": 0.3559, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.103400093153237, |
|
"grad_norm": 0.5580827812560017, |
|
"learning_rate": 4.0225271879854996e-05, |
|
"loss": 0.3572, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.1080577550069866, |
|
"grad_norm": 0.40826648699641077, |
|
"learning_rate": 4.016053858104609e-05, |
|
"loss": 0.3396, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.1127154168607358, |
|
"grad_norm": 0.4868993696437974, |
|
"learning_rate": 4.009580528223719e-05, |
|
"loss": 0.3481, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.1173730787144853, |
|
"grad_norm": 0.44958393221221327, |
|
"learning_rate": 4.0031071983428275e-05, |
|
"loss": 0.3574, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1220307405682348, |
|
"grad_norm": 0.4529924774506899, |
|
"learning_rate": 3.996633868461937e-05, |
|
"loss": 0.366, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.126688402421984, |
|
"grad_norm": 0.45382106058655186, |
|
"learning_rate": 3.990160538581046e-05, |
|
"loss": 0.3611, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.1313460642757336, |
|
"grad_norm": 0.774011131965164, |
|
"learning_rate": 3.9836872087001554e-05, |
|
"loss": 0.3574, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.136003726129483, |
|
"grad_norm": 0.5708209085490166, |
|
"learning_rate": 3.977213878819265e-05, |
|
"loss": 0.3679, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.1406613879832324, |
|
"grad_norm": 0.5407084277053351, |
|
"learning_rate": 3.970740548938374e-05, |
|
"loss": 0.3602, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.1453190498369819, |
|
"grad_norm": 0.5607895201020598, |
|
"learning_rate": 3.964267219057483e-05, |
|
"loss": 0.3569, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.1499767116907313, |
|
"grad_norm": 0.5762919296213727, |
|
"learning_rate": 3.9577938891765926e-05, |
|
"loss": 0.3579, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.1546343735444806, |
|
"grad_norm": 0.4915060554356313, |
|
"learning_rate": 3.951320559295702e-05, |
|
"loss": 0.3631, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.1592920353982301, |
|
"grad_norm": 0.47937400758343035, |
|
"learning_rate": 3.944847229414811e-05, |
|
"loss": 0.3661, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.1639496972519794, |
|
"grad_norm": 0.5488280878505598, |
|
"learning_rate": 3.9383738995339205e-05, |
|
"loss": 0.369, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1686073591057289, |
|
"grad_norm": 0.6567448003318919, |
|
"learning_rate": 3.93190056965303e-05, |
|
"loss": 0.3574, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.1732650209594784, |
|
"grad_norm": 0.5486815256330948, |
|
"learning_rate": 3.925427239772139e-05, |
|
"loss": 0.3679, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.1779226828132279, |
|
"grad_norm": 0.4616890041926306, |
|
"learning_rate": 3.9189539098912484e-05, |
|
"loss": 0.3579, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.1825803446669771, |
|
"grad_norm": 0.5392889454010211, |
|
"learning_rate": 3.912480580010358e-05, |
|
"loss": 0.3483, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.1872380065207266, |
|
"grad_norm": 0.4479725279421751, |
|
"learning_rate": 3.906007250129467e-05, |
|
"loss": 0.3587, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.191895668374476, |
|
"grad_norm": 0.7590029961366441, |
|
"learning_rate": 3.8995339202485756e-05, |
|
"loss": 0.3637, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.1965533302282254, |
|
"grad_norm": 0.6157692881026866, |
|
"learning_rate": 3.8930605903676856e-05, |
|
"loss": 0.3668, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.201210992081975, |
|
"grad_norm": 0.5380563110707145, |
|
"learning_rate": 3.886587260486795e-05, |
|
"loss": 0.358, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.2058686539357242, |
|
"grad_norm": 0.9956632223371663, |
|
"learning_rate": 3.8801139306059035e-05, |
|
"loss": 0.3593, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.2105263157894737, |
|
"grad_norm": 0.6675088366219036, |
|
"learning_rate": 3.873640600725013e-05, |
|
"loss": 0.358, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2151839776432232, |
|
"grad_norm": 0.6151654966839248, |
|
"learning_rate": 3.867167270844123e-05, |
|
"loss": 0.3669, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.2198416394969724, |
|
"grad_norm": 0.46562609640922176, |
|
"learning_rate": 3.8606939409632314e-05, |
|
"loss": 0.3684, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.224499301350722, |
|
"grad_norm": 0.45769392273694903, |
|
"learning_rate": 3.854220611082341e-05, |
|
"loss": 0.3583, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.2291569632044714, |
|
"grad_norm": 0.4734611105934788, |
|
"learning_rate": 3.84774728120145e-05, |
|
"loss": 0.3673, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.2338146250582207, |
|
"grad_norm": 0.48854960034703904, |
|
"learning_rate": 3.841273951320559e-05, |
|
"loss": 0.3602, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.2384722869119702, |
|
"grad_norm": 0.491431877053754, |
|
"learning_rate": 3.8348006214396686e-05, |
|
"loss": 0.3659, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.2431299487657197, |
|
"grad_norm": 0.5080088344027641, |
|
"learning_rate": 3.828327291558778e-05, |
|
"loss": 0.3569, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.247787610619469, |
|
"grad_norm": 0.8013937265298156, |
|
"learning_rate": 3.821853961677888e-05, |
|
"loss": 0.3607, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.2524452724732185, |
|
"grad_norm": 0.4152250898013425, |
|
"learning_rate": 3.8153806317969965e-05, |
|
"loss": 0.353, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.257102934326968, |
|
"grad_norm": 0.5085970118735269, |
|
"learning_rate": 3.808907301916106e-05, |
|
"loss": 0.3589, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.2617605961807172, |
|
"grad_norm": 0.5551559795100467, |
|
"learning_rate": 3.802433972035215e-05, |
|
"loss": 0.356, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.2664182580344667, |
|
"grad_norm": 0.6050619124995583, |
|
"learning_rate": 3.7959606421543244e-05, |
|
"loss": 0.3495, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.271075919888216, |
|
"grad_norm": 0.3819757584386992, |
|
"learning_rate": 3.789487312273434e-05, |
|
"loss": 0.3612, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.2757335817419655, |
|
"grad_norm": 0.518505991982242, |
|
"learning_rate": 3.783013982392543e-05, |
|
"loss": 0.3606, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.280391243595715, |
|
"grad_norm": 0.489703544385954, |
|
"learning_rate": 3.776540652511652e-05, |
|
"loss": 0.3603, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.2850489054494645, |
|
"grad_norm": 0.4189177736950793, |
|
"learning_rate": 3.7700673226307616e-05, |
|
"loss": 0.3586, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.2897065673032138, |
|
"grad_norm": 0.49544734897634235, |
|
"learning_rate": 3.763593992749871e-05, |
|
"loss": 0.3584, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.2943642291569633, |
|
"grad_norm": 0.7339020852038594, |
|
"learning_rate": 3.7571206628689795e-05, |
|
"loss": 0.3671, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.2990218910107125, |
|
"grad_norm": 0.47812102647655946, |
|
"learning_rate": 3.7506473329880895e-05, |
|
"loss": 0.3709, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.303679552864462, |
|
"grad_norm": 0.4842166642543789, |
|
"learning_rate": 3.744174003107199e-05, |
|
"loss": 0.3645, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3083372147182115, |
|
"grad_norm": 0.5461761326327322, |
|
"learning_rate": 3.7377006732263074e-05, |
|
"loss": 0.3582, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.312994876571961, |
|
"grad_norm": 0.3782413550156958, |
|
"learning_rate": 3.731227343345417e-05, |
|
"loss": 0.357, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.3176525384257103, |
|
"grad_norm": 0.5206824458146679, |
|
"learning_rate": 3.724754013464527e-05, |
|
"loss": 0.376, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.3223102002794598, |
|
"grad_norm": 0.46613034806590914, |
|
"learning_rate": 3.718280683583635e-05, |
|
"loss": 0.3621, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.326967862133209, |
|
"grad_norm": 0.4076821120515726, |
|
"learning_rate": 3.7118073537027446e-05, |
|
"loss": 0.3603, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.3316255239869585, |
|
"grad_norm": 0.574827732477907, |
|
"learning_rate": 3.705334023821854e-05, |
|
"loss": 0.3577, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.336283185840708, |
|
"grad_norm": 0.4990608353362272, |
|
"learning_rate": 3.698860693940964e-05, |
|
"loss": 0.3621, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.3409408476944573, |
|
"grad_norm": 0.4258017041885792, |
|
"learning_rate": 3.6923873640600725e-05, |
|
"loss": 0.3612, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.3455985095482068, |
|
"grad_norm": 0.5518768048000128, |
|
"learning_rate": 3.685914034179182e-05, |
|
"loss": 0.3679, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.350256171401956, |
|
"grad_norm": 0.4451614301869116, |
|
"learning_rate": 3.679440704298292e-05, |
|
"loss": 0.358, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.3549138332557056, |
|
"grad_norm": 0.512257774552283, |
|
"learning_rate": 3.6729673744174004e-05, |
|
"loss": 0.3676, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.359571495109455, |
|
"grad_norm": 0.5162438863691285, |
|
"learning_rate": 3.66649404453651e-05, |
|
"loss": 0.3519, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.3642291569632046, |
|
"grad_norm": 0.471777012702105, |
|
"learning_rate": 3.660020714655619e-05, |
|
"loss": 0.356, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.3688868188169538, |
|
"grad_norm": 0.48449733457712785, |
|
"learning_rate": 3.653547384774728e-05, |
|
"loss": 0.3623, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.3735444806707033, |
|
"grad_norm": 0.4576323028052415, |
|
"learning_rate": 3.6470740548938376e-05, |
|
"loss": 0.3535, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.3782021425244526, |
|
"grad_norm": 0.5006209765273532, |
|
"learning_rate": 3.640600725012947e-05, |
|
"loss": 0.3558, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.382859804378202, |
|
"grad_norm": 0.4418094810372605, |
|
"learning_rate": 3.634127395132056e-05, |
|
"loss": 0.3647, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.3875174662319516, |
|
"grad_norm": 0.5278545308450533, |
|
"learning_rate": 3.6276540652511655e-05, |
|
"loss": 0.3663, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.392175128085701, |
|
"grad_norm": 0.8122089772240753, |
|
"learning_rate": 3.621180735370275e-05, |
|
"loss": 0.365, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.3968327899394504, |
|
"grad_norm": 0.4991551907714853, |
|
"learning_rate": 3.6147074054893834e-05, |
|
"loss": 0.3558, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4014904517931999, |
|
"grad_norm": 0.45673339355259496, |
|
"learning_rate": 3.6082340756084934e-05, |
|
"loss": 0.3601, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.4061481136469491, |
|
"grad_norm": 0.45172383447115166, |
|
"learning_rate": 3.601760745727603e-05, |
|
"loss": 0.3577, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.4108057755006986, |
|
"grad_norm": 0.5433134841533829, |
|
"learning_rate": 3.595287415846711e-05, |
|
"loss": 0.3676, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.4154634373544481, |
|
"grad_norm": 0.48998833758767707, |
|
"learning_rate": 3.5888140859658206e-05, |
|
"loss": 0.3606, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.4201210992081974, |
|
"grad_norm": 0.6104280836329665, |
|
"learning_rate": 3.5823407560849306e-05, |
|
"loss": 0.3617, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.424778761061947, |
|
"grad_norm": 1.3807202434190187, |
|
"learning_rate": 3.57586742620404e-05, |
|
"loss": 0.3511, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.4294364229156964, |
|
"grad_norm": 0.5813011933063881, |
|
"learning_rate": 3.5693940963231485e-05, |
|
"loss": 0.3668, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.4340940847694457, |
|
"grad_norm": 0.4086292338502313, |
|
"learning_rate": 3.5629207664422585e-05, |
|
"loss": 0.3574, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.4387517466231952, |
|
"grad_norm": 0.45242586909239896, |
|
"learning_rate": 3.556447436561368e-05, |
|
"loss": 0.3579, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.4434094084769447, |
|
"grad_norm": 0.42955196340267765, |
|
"learning_rate": 3.5499741066804764e-05, |
|
"loss": 0.3551, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.448067070330694, |
|
"grad_norm": 0.4355035350563131, |
|
"learning_rate": 3.543500776799586e-05, |
|
"loss": 0.3501, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.4527247321844434, |
|
"grad_norm": 0.42304027877574124, |
|
"learning_rate": 3.537027446918696e-05, |
|
"loss": 0.3561, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.4573823940381927, |
|
"grad_norm": 0.3943828294567786, |
|
"learning_rate": 3.530554117037804e-05, |
|
"loss": 0.3631, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.4620400558919422, |
|
"grad_norm": 0.4544956891841179, |
|
"learning_rate": 3.5240807871569136e-05, |
|
"loss": 0.376, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.4666977177456917, |
|
"grad_norm": 0.555764704348166, |
|
"learning_rate": 3.517607457276023e-05, |
|
"loss": 0.3584, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.4713553795994412, |
|
"grad_norm": 0.6196585992504084, |
|
"learning_rate": 3.511134127395132e-05, |
|
"loss": 0.3514, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.4760130414531905, |
|
"grad_norm": 0.5330586804823, |
|
"learning_rate": 3.5046607975142415e-05, |
|
"loss": 0.3576, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.48067070330694, |
|
"grad_norm": 0.5553874975958871, |
|
"learning_rate": 3.498187467633351e-05, |
|
"loss": 0.3596, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.4853283651606892, |
|
"grad_norm": 0.45769356056194815, |
|
"learning_rate": 3.49171413775246e-05, |
|
"loss": 0.3557, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.4899860270144387, |
|
"grad_norm": 0.5584387615516803, |
|
"learning_rate": 3.4852408078715694e-05, |
|
"loss": 0.3613, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.4946436888681882, |
|
"grad_norm": 0.43343277775624545, |
|
"learning_rate": 3.478767477990679e-05, |
|
"loss": 0.3524, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.4993013507219377, |
|
"grad_norm": 0.4377140537194227, |
|
"learning_rate": 3.472294148109787e-05, |
|
"loss": 0.3607, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.503959012575687, |
|
"grad_norm": 0.5053071933612086, |
|
"learning_rate": 3.465820818228897e-05, |
|
"loss": 0.3582, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 1.5086166744294365, |
|
"grad_norm": 0.47265497816294294, |
|
"learning_rate": 3.4593474883480066e-05, |
|
"loss": 0.359, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.5132743362831858, |
|
"grad_norm": 0.4617635092436261, |
|
"learning_rate": 3.452874158467116e-05, |
|
"loss": 0.3626, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.5179319981369352, |
|
"grad_norm": 0.5305726325688056, |
|
"learning_rate": 3.4464008285862245e-05, |
|
"loss": 0.3596, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.5225896599906847, |
|
"grad_norm": 0.4457785432446353, |
|
"learning_rate": 3.4399274987053345e-05, |
|
"loss": 0.3722, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.5272473218444342, |
|
"grad_norm": 0.4196162887632958, |
|
"learning_rate": 3.433454168824444e-05, |
|
"loss": 0.365, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.5319049836981835, |
|
"grad_norm": 0.4499181015182438, |
|
"learning_rate": 3.4269808389435524e-05, |
|
"loss": 0.3665, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 1.5365626455519328, |
|
"grad_norm": 0.41777827066281287, |
|
"learning_rate": 3.4205075090626624e-05, |
|
"loss": 0.3511, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.5412203074056823, |
|
"grad_norm": 0.483686609320562, |
|
"learning_rate": 3.414034179181772e-05, |
|
"loss": 0.3513, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.5458779692594318, |
|
"grad_norm": 0.42024302687692083, |
|
"learning_rate": 3.40756084930088e-05, |
|
"loss": 0.3502, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.5505356311131813, |
|
"grad_norm": 0.6580744072852233, |
|
"learning_rate": 3.4010875194199896e-05, |
|
"loss": 0.3629, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.5551932929669308, |
|
"grad_norm": 0.4143100503219979, |
|
"learning_rate": 3.3946141895390996e-05, |
|
"loss": 0.3583, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.55985095482068, |
|
"grad_norm": 0.45443895548063523, |
|
"learning_rate": 3.388140859658208e-05, |
|
"loss": 0.3569, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.5645086166744293, |
|
"grad_norm": 0.42295760226587914, |
|
"learning_rate": 3.3816675297773175e-05, |
|
"loss": 0.3635, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.5691662785281788, |
|
"grad_norm": 0.5110004227388002, |
|
"learning_rate": 3.375194199896427e-05, |
|
"loss": 0.3533, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 1.5738239403819283, |
|
"grad_norm": 0.42320283733825365, |
|
"learning_rate": 3.368720870015536e-05, |
|
"loss": 0.3597, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.5784816022356778, |
|
"grad_norm": 0.41482962057863415, |
|
"learning_rate": 3.3622475401346454e-05, |
|
"loss": 0.358, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.583139264089427, |
|
"grad_norm": 0.4150667378311711, |
|
"learning_rate": 3.355774210253755e-05, |
|
"loss": 0.3632, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.5877969259431766, |
|
"grad_norm": 0.3666610812445483, |
|
"learning_rate": 3.349300880372864e-05, |
|
"loss": 0.3552, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 1.5924545877969258, |
|
"grad_norm": 0.4530536221435045, |
|
"learning_rate": 3.342827550491973e-05, |
|
"loss": 0.363, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.5971122496506753, |
|
"grad_norm": 0.4941170831154173, |
|
"learning_rate": 3.3363542206110826e-05, |
|
"loss": 0.3631, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.6017699115044248, |
|
"grad_norm": 0.4707664848927128, |
|
"learning_rate": 3.329880890730191e-05, |
|
"loss": 0.35, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.6064275733581743, |
|
"grad_norm": 0.46158780862776105, |
|
"learning_rate": 3.323407560849301e-05, |
|
"loss": 0.3562, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.6110852352119236, |
|
"grad_norm": 0.41962064552472267, |
|
"learning_rate": 3.3169342309684105e-05, |
|
"loss": 0.3674, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.6157428970656729, |
|
"grad_norm": 0.45419850851419985, |
|
"learning_rate": 3.31046090108752e-05, |
|
"loss": 0.3683, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.6204005589194224, |
|
"grad_norm": 0.46971165862694936, |
|
"learning_rate": 3.3039875712066284e-05, |
|
"loss": 0.3478, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.6250582207731719, |
|
"grad_norm": 0.47443096230607346, |
|
"learning_rate": 3.2975142413257384e-05, |
|
"loss": 0.3772, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.6297158826269214, |
|
"grad_norm": 0.37835298699147984, |
|
"learning_rate": 3.291040911444848e-05, |
|
"loss": 0.3578, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.6343735444806708, |
|
"grad_norm": 0.4582780296219518, |
|
"learning_rate": 3.284567581563956e-05, |
|
"loss": 0.3664, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.6390312063344201, |
|
"grad_norm": 0.4354429562476338, |
|
"learning_rate": 3.278094251683066e-05, |
|
"loss": 0.3534, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.6436888681881694, |
|
"grad_norm": 0.5789259252606435, |
|
"learning_rate": 3.2716209218021756e-05, |
|
"loss": 0.356, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.648346530041919, |
|
"grad_norm": 0.464453568996508, |
|
"learning_rate": 3.265147591921284e-05, |
|
"loss": 0.3503, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.6530041918956684, |
|
"grad_norm": 0.4568793337848766, |
|
"learning_rate": 3.2586742620403935e-05, |
|
"loss": 0.3654, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.6576618537494179, |
|
"grad_norm": 0.4717572823402282, |
|
"learning_rate": 3.2522009321595035e-05, |
|
"loss": 0.3542, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.6623195156031674, |
|
"grad_norm": 0.4567156289592614, |
|
"learning_rate": 3.245727602278612e-05, |
|
"loss": 0.3629, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.6669771774569166, |
|
"grad_norm": 0.4208675681151171, |
|
"learning_rate": 3.2392542723977214e-05, |
|
"loss": 0.3527, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.671634839310666, |
|
"grad_norm": 0.5044454939254893, |
|
"learning_rate": 3.232780942516831e-05, |
|
"loss": 0.3525, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.6762925011644154, |
|
"grad_norm": 0.42387939427190896, |
|
"learning_rate": 3.22630761263594e-05, |
|
"loss": 0.3506, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.680950163018165, |
|
"grad_norm": 0.5023473389380669, |
|
"learning_rate": 3.219834282755049e-05, |
|
"loss": 0.3579, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.6856078248719144, |
|
"grad_norm": 0.47206054679476617, |
|
"learning_rate": 3.2133609528741586e-05, |
|
"loss": 0.3564, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.6902654867256637, |
|
"grad_norm": 0.4607885248484138, |
|
"learning_rate": 3.206887622993268e-05, |
|
"loss": 0.3589, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.6949231485794132, |
|
"grad_norm": 0.35782128979620814, |
|
"learning_rate": 3.200414293112377e-05, |
|
"loss": 0.3454, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.6995808104331624, |
|
"grad_norm": 0.44305913674826386, |
|
"learning_rate": 3.1939409632314865e-05, |
|
"loss": 0.3488, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.704238472286912, |
|
"grad_norm": 0.38935978775442764, |
|
"learning_rate": 3.187467633350596e-05, |
|
"loss": 0.3532, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.7088961341406614, |
|
"grad_norm": 0.40971733347800393, |
|
"learning_rate": 3.180994303469705e-05, |
|
"loss": 0.3606, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.713553795994411, |
|
"grad_norm": 0.44109214451792117, |
|
"learning_rate": 3.1745209735888144e-05, |
|
"loss": 0.3547, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.7182114578481602, |
|
"grad_norm": 0.40169684579135284, |
|
"learning_rate": 3.168047643707924e-05, |
|
"loss": 0.3596, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.7228691197019095, |
|
"grad_norm": 0.3962129300962754, |
|
"learning_rate": 3.161574313827032e-05, |
|
"loss": 0.3668, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.727526781555659, |
|
"grad_norm": 0.42734260387858874, |
|
"learning_rate": 3.155100983946142e-05, |
|
"loss": 0.348, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.7321844434094085, |
|
"grad_norm": 0.6659934996226169, |
|
"learning_rate": 3.1486276540652516e-05, |
|
"loss": 0.3716, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.736842105263158, |
|
"grad_norm": 0.5555095008887161, |
|
"learning_rate": 3.14215432418436e-05, |
|
"loss": 0.3524, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.7414997671169075, |
|
"grad_norm": 0.3709679427144339, |
|
"learning_rate": 3.13568099430347e-05, |
|
"loss": 0.3547, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.7461574289706567, |
|
"grad_norm": 0.4145921631194794, |
|
"learning_rate": 3.1292076644225795e-05, |
|
"loss": 0.3712, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.750815090824406, |
|
"grad_norm": 0.4814783579328554, |
|
"learning_rate": 3.122734334541688e-05, |
|
"loss": 0.3613, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.7554727526781555, |
|
"grad_norm": 0.49050194254390594, |
|
"learning_rate": 3.1162610046607974e-05, |
|
"loss": 0.3635, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.760130414531905, |
|
"grad_norm": 0.4333631796725332, |
|
"learning_rate": 3.1097876747799074e-05, |
|
"loss": 0.3605, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.7647880763856545, |
|
"grad_norm": 0.4868988400346533, |
|
"learning_rate": 3.103314344899016e-05, |
|
"loss": 0.3689, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.7694457382394038, |
|
"grad_norm": 0.4001143060111157, |
|
"learning_rate": 3.096841015018125e-05, |
|
"loss": 0.3592, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.7741034000931533, |
|
"grad_norm": 0.45824721223774795, |
|
"learning_rate": 3.0903676851372346e-05, |
|
"loss": 0.3582, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.7787610619469025, |
|
"grad_norm": 0.5424585883204478, |
|
"learning_rate": 3.083894355256344e-05, |
|
"loss": 0.3534, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.783418723800652, |
|
"grad_norm": 0.5014177386049438, |
|
"learning_rate": 3.077421025375453e-05, |
|
"loss": 0.3482, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.7880763856544015, |
|
"grad_norm": 0.4851314989650092, |
|
"learning_rate": 3.0709476954945625e-05, |
|
"loss": 0.3514, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.792734047508151, |
|
"grad_norm": 0.42176578290218586, |
|
"learning_rate": 3.064474365613672e-05, |
|
"loss": 0.3623, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.7973917093619003, |
|
"grad_norm": 0.49197555336823995, |
|
"learning_rate": 3.058001035732781e-05, |
|
"loss": 0.3517, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.8020493712156498, |
|
"grad_norm": 0.5114907876049536, |
|
"learning_rate": 3.0515277058518904e-05, |
|
"loss": 0.3471, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.806707033069399, |
|
"grad_norm": 0.4579020379986667, |
|
"learning_rate": 3.045054375971e-05, |
|
"loss": 0.3499, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.8113646949231486, |
|
"grad_norm": 0.4162796744779169, |
|
"learning_rate": 3.0385810460901086e-05, |
|
"loss": 0.3602, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.816022356776898, |
|
"grad_norm": 0.43293484925432135, |
|
"learning_rate": 3.0321077162092183e-05, |
|
"loss": 0.3516, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.8206800186306475, |
|
"grad_norm": 0.38712551527265004, |
|
"learning_rate": 3.0256343863283276e-05, |
|
"loss": 0.3553, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.8253376804843968, |
|
"grad_norm": 0.5392482471776439, |
|
"learning_rate": 3.0191610564474365e-05, |
|
"loss": 0.3565, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.829995342338146, |
|
"grad_norm": 0.3943059237946394, |
|
"learning_rate": 3.0126877265665458e-05, |
|
"loss": 0.3593, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.8346530041918956, |
|
"grad_norm": 0.4256479628934368, |
|
"learning_rate": 3.0062143966856555e-05, |
|
"loss": 0.3536, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.839310666045645, |
|
"grad_norm": 0.5122761549068479, |
|
"learning_rate": 2.9997410668047644e-05, |
|
"loss": 0.3589, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.8439683278993946, |
|
"grad_norm": 0.370961688185249, |
|
"learning_rate": 2.9932677369238737e-05, |
|
"loss": 0.3503, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.848625989753144, |
|
"grad_norm": 0.4063678003465277, |
|
"learning_rate": 2.9867944070429834e-05, |
|
"loss": 0.3649, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.8532836516068933, |
|
"grad_norm": 0.5043143808118292, |
|
"learning_rate": 2.980321077162092e-05, |
|
"loss": 0.3597, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.8579413134606426, |
|
"grad_norm": 0.3888766687196224, |
|
"learning_rate": 2.9738477472812016e-05, |
|
"loss": 0.3478, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.8625989753143921, |
|
"grad_norm": 0.3653540581805115, |
|
"learning_rate": 2.967374417400311e-05, |
|
"loss": 0.3563, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8672566371681416, |
|
"grad_norm": 0.42346683909033195, |
|
"learning_rate": 2.96090108751942e-05, |
|
"loss": 0.3626, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.871914299021891, |
|
"grad_norm": 0.37053462856669767, |
|
"learning_rate": 2.9544277576385292e-05, |
|
"loss": 0.3561, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.8765719608756404, |
|
"grad_norm": 0.5423835623045288, |
|
"learning_rate": 2.9479544277576388e-05, |
|
"loss": 0.368, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.8812296227293899, |
|
"grad_norm": 0.4430785063248816, |
|
"learning_rate": 2.941481097876748e-05, |
|
"loss": 0.3532, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.8858872845831391, |
|
"grad_norm": 0.5390136431233741, |
|
"learning_rate": 2.935007767995857e-05, |
|
"loss": 0.3568, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.8905449464368886, |
|
"grad_norm": 0.42451261658331807, |
|
"learning_rate": 2.9285344381149664e-05, |
|
"loss": 0.3546, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.8952026082906381, |
|
"grad_norm": 0.4704880484210268, |
|
"learning_rate": 2.922061108234076e-05, |
|
"loss": 0.3528, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.8998602701443876, |
|
"grad_norm": 10.0374479730041, |
|
"learning_rate": 2.915587778353185e-05, |
|
"loss": 0.3662, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.904517931998137, |
|
"grad_norm": 0.44064361841438693, |
|
"learning_rate": 2.9091144484722943e-05, |
|
"loss": 0.3639, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.9091755938518864, |
|
"grad_norm": 0.6176856815800884, |
|
"learning_rate": 2.902641118591404e-05, |
|
"loss": 0.3606, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.9138332557056357, |
|
"grad_norm": 0.5399754968576305, |
|
"learning_rate": 2.8961677887105125e-05, |
|
"loss": 0.3578, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.9184909175593852, |
|
"grad_norm": 0.4645100850311685, |
|
"learning_rate": 2.889694458829622e-05, |
|
"loss": 0.3533, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.9231485794131347, |
|
"grad_norm": 0.5042470599105712, |
|
"learning_rate": 2.8832211289487315e-05, |
|
"loss": 0.356, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.9278062412668842, |
|
"grad_norm": 0.4839587513961929, |
|
"learning_rate": 2.8767477990678404e-05, |
|
"loss": 0.3584, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.9324639031206334, |
|
"grad_norm": 0.5205667060014961, |
|
"learning_rate": 2.8702744691869497e-05, |
|
"loss": 0.3568, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.9371215649743827, |
|
"grad_norm": 0.5177813037309902, |
|
"learning_rate": 2.8638011393060594e-05, |
|
"loss": 0.3505, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.9417792268281322, |
|
"grad_norm": 0.4545545367610249, |
|
"learning_rate": 2.8573278094251683e-05, |
|
"loss": 0.3608, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.9464368886818817, |
|
"grad_norm": 0.41189147778170226, |
|
"learning_rate": 2.8508544795442776e-05, |
|
"loss": 0.3452, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.9510945505356312, |
|
"grad_norm": 0.4202290799914547, |
|
"learning_rate": 2.8443811496633873e-05, |
|
"loss": 0.3514, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.9557522123893807, |
|
"grad_norm": 0.4377924322321175, |
|
"learning_rate": 2.837907819782496e-05, |
|
"loss": 0.3542, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.96040987424313, |
|
"grad_norm": 0.5132414811078034, |
|
"learning_rate": 2.8314344899016055e-05, |
|
"loss": 0.3611, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.9650675360968792, |
|
"grad_norm": 0.4929000956532224, |
|
"learning_rate": 2.8249611600207148e-05, |
|
"loss": 0.3487, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.9697251979506287, |
|
"grad_norm": 0.49429590172686955, |
|
"learning_rate": 2.8184878301398244e-05, |
|
"loss": 0.3548, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.9743828598043782, |
|
"grad_norm": 0.45457503823460055, |
|
"learning_rate": 2.812014500258933e-05, |
|
"loss": 0.3566, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.9790405216581277, |
|
"grad_norm": 0.3816140696343873, |
|
"learning_rate": 2.8055411703780427e-05, |
|
"loss": 0.3517, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.983698183511877, |
|
"grad_norm": 0.6242736771442058, |
|
"learning_rate": 2.799067840497152e-05, |
|
"loss": 0.3543, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.9883558453656265, |
|
"grad_norm": 0.4277411480118539, |
|
"learning_rate": 2.792594510616261e-05, |
|
"loss": 0.3592, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.9930135072193758, |
|
"grad_norm": 0.5159703918534981, |
|
"learning_rate": 2.7861211807353703e-05, |
|
"loss": 0.3579, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.9976711690731253, |
|
"grad_norm": 0.4202566030642709, |
|
"learning_rate": 2.77964785085448e-05, |
|
"loss": 0.3585, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.0018630647415, |
|
"grad_norm": 0.5609402586843426, |
|
"learning_rate": 2.773174520973589e-05, |
|
"loss": 0.3298, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.0065207265952494, |
|
"grad_norm": 0.425941696873025, |
|
"learning_rate": 2.766701191092698e-05, |
|
"loss": 0.2798, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.0111783884489984, |
|
"grad_norm": 0.3916657744615872, |
|
"learning_rate": 2.7602278612118078e-05, |
|
"loss": 0.2704, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.015836050302748, |
|
"grad_norm": 0.43810490702164956, |
|
"learning_rate": 2.7537545313309164e-05, |
|
"loss": 0.2683, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.0204937121564974, |
|
"grad_norm": 0.42179753955288946, |
|
"learning_rate": 2.747281201450026e-05, |
|
"loss": 0.2647, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.025151374010247, |
|
"grad_norm": 0.41065922439891167, |
|
"learning_rate": 2.7408078715691354e-05, |
|
"loss": 0.2722, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.0298090358639964, |
|
"grad_norm": 1.1811479608004933, |
|
"learning_rate": 2.7343345416882443e-05, |
|
"loss": 0.2658, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.034466697717746, |
|
"grad_norm": 0.3840477047643718, |
|
"learning_rate": 2.7278612118073536e-05, |
|
"loss": 0.2684, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.039124359571495, |
|
"grad_norm": 0.45197057438747157, |
|
"learning_rate": 2.7213878819264633e-05, |
|
"loss": 0.2725, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.0437820214252445, |
|
"grad_norm": 0.36416091004272566, |
|
"learning_rate": 2.7149145520455722e-05, |
|
"loss": 0.268, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 2.048439683278994, |
|
"grad_norm": 0.43811123788548967, |
|
"learning_rate": 2.7084412221646815e-05, |
|
"loss": 0.269, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.0530973451327434, |
|
"grad_norm": 0.45910013565579505, |
|
"learning_rate": 2.701967892283791e-05, |
|
"loss": 0.2736, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.057755006986493, |
|
"grad_norm": 0.4013422169686014, |
|
"learning_rate": 2.6954945624029004e-05, |
|
"loss": 0.2642, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.062412668840242, |
|
"grad_norm": 0.4295693677688835, |
|
"learning_rate": 2.6890212325220094e-05, |
|
"loss": 0.2705, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 2.0670703306939915, |
|
"grad_norm": 0.3370168623171349, |
|
"learning_rate": 2.6825479026411187e-05, |
|
"loss": 0.2677, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.071727992547741, |
|
"grad_norm": 0.40197526047576093, |
|
"learning_rate": 2.6760745727602283e-05, |
|
"loss": 0.2668, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.0763856544014905, |
|
"grad_norm": 0.45672730531903405, |
|
"learning_rate": 2.669601242879337e-05, |
|
"loss": 0.2701, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.08104331625524, |
|
"grad_norm": 0.39868426985110295, |
|
"learning_rate": 2.6631279129984466e-05, |
|
"loss": 0.2648, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 2.0857009781089895, |
|
"grad_norm": 0.36305570920930336, |
|
"learning_rate": 2.656654583117556e-05, |
|
"loss": 0.2761, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.0903586399627385, |
|
"grad_norm": 0.4023887365515303, |
|
"learning_rate": 2.650181253236665e-05, |
|
"loss": 0.271, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 2.095016301816488, |
|
"grad_norm": 0.3716267176543075, |
|
"learning_rate": 2.643707923355774e-05, |
|
"loss": 0.2667, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.0996739636702375, |
|
"grad_norm": 0.4200410072696504, |
|
"learning_rate": 2.6372345934748838e-05, |
|
"loss": 0.2762, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 2.104331625523987, |
|
"grad_norm": 0.3687755529593597, |
|
"learning_rate": 2.6307612635939928e-05, |
|
"loss": 0.2655, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.1089892873777365, |
|
"grad_norm": 0.32809338851619163, |
|
"learning_rate": 2.624287933713102e-05, |
|
"loss": 0.2713, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 2.113646949231486, |
|
"grad_norm": 0.3409360403110876, |
|
"learning_rate": 2.6178146038322117e-05, |
|
"loss": 0.2685, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.118304611085235, |
|
"grad_norm": 0.7982011287578665, |
|
"learning_rate": 2.6113412739513203e-05, |
|
"loss": 0.2746, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.1229622729389845, |
|
"grad_norm": 0.3617141903798149, |
|
"learning_rate": 2.60486794407043e-05, |
|
"loss": 0.2715, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.127619934792734, |
|
"grad_norm": 0.3727969530757095, |
|
"learning_rate": 2.5983946141895393e-05, |
|
"loss": 0.2738, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 2.1322775966464835, |
|
"grad_norm": 0.40537658839373925, |
|
"learning_rate": 2.5919212843086482e-05, |
|
"loss": 0.2713, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.136935258500233, |
|
"grad_norm": 0.4592856921511742, |
|
"learning_rate": 2.5854479544277575e-05, |
|
"loss": 0.2786, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 2.1415929203539825, |
|
"grad_norm": 0.3602958767186378, |
|
"learning_rate": 2.578974624546867e-05, |
|
"loss": 0.2763, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.1462505822077316, |
|
"grad_norm": 0.36531905883050036, |
|
"learning_rate": 2.5725012946659764e-05, |
|
"loss": 0.2732, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 2.150908244061481, |
|
"grad_norm": 0.3623277574575608, |
|
"learning_rate": 2.5660279647850854e-05, |
|
"loss": 0.2643, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.1555659059152306, |
|
"grad_norm": 0.3884139128194696, |
|
"learning_rate": 2.559554634904195e-05, |
|
"loss": 0.2693, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 2.16022356776898, |
|
"grad_norm": 0.3919757281405461, |
|
"learning_rate": 2.5530813050233043e-05, |
|
"loss": 0.2656, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.1648812296227296, |
|
"grad_norm": 0.4016863934977521, |
|
"learning_rate": 2.5466079751424133e-05, |
|
"loss": 0.2742, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.1695388914764786, |
|
"grad_norm": 0.31887836235970607, |
|
"learning_rate": 2.5401346452615226e-05, |
|
"loss": 0.2737, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.174196553330228, |
|
"grad_norm": 0.40350840902874313, |
|
"learning_rate": 2.5336613153806322e-05, |
|
"loss": 0.2662, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 2.1788542151839776, |
|
"grad_norm": 0.4135565972212336, |
|
"learning_rate": 2.527187985499741e-05, |
|
"loss": 0.2717, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.183511877037727, |
|
"grad_norm": 0.31536031277914545, |
|
"learning_rate": 2.5207146556188505e-05, |
|
"loss": 0.2772, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 2.1881695388914766, |
|
"grad_norm": 0.34236501501838795, |
|
"learning_rate": 2.5142413257379598e-05, |
|
"loss": 0.271, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.192827200745226, |
|
"grad_norm": 0.317656594570138, |
|
"learning_rate": 2.5077679958570688e-05, |
|
"loss": 0.2682, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 2.197484862598975, |
|
"grad_norm": 0.3661371683010254, |
|
"learning_rate": 2.5012946659761784e-05, |
|
"loss": 0.2755, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.2021425244527246, |
|
"grad_norm": 0.41181534702112593, |
|
"learning_rate": 2.4948213360952874e-05, |
|
"loss": 0.2738, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 2.206800186306474, |
|
"grad_norm": 0.3653865073116719, |
|
"learning_rate": 2.488348006214397e-05, |
|
"loss": 0.2736, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.2114578481602236, |
|
"grad_norm": 0.39203577537195466, |
|
"learning_rate": 2.481874676333506e-05, |
|
"loss": 0.2742, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.216115510013973, |
|
"grad_norm": 0.3720934862670963, |
|
"learning_rate": 2.4754013464526156e-05, |
|
"loss": 0.2724, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.2207731718677226, |
|
"grad_norm": 0.34171876985926397, |
|
"learning_rate": 2.4689280165717246e-05, |
|
"loss": 0.2831, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 2.2254308337214717, |
|
"grad_norm": 0.39944570037740157, |
|
"learning_rate": 2.462454686690834e-05, |
|
"loss": 0.2794, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.230088495575221, |
|
"grad_norm": 0.3550895446151112, |
|
"learning_rate": 2.455981356809943e-05, |
|
"loss": 0.2725, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 2.2347461574289706, |
|
"grad_norm": 0.4970447794860612, |
|
"learning_rate": 2.4495080269290524e-05, |
|
"loss": 0.2644, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.23940381928272, |
|
"grad_norm": 0.4050207598981995, |
|
"learning_rate": 2.4430346970481614e-05, |
|
"loss": 0.2732, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 2.2440614811364696, |
|
"grad_norm": 0.3707865091431949, |
|
"learning_rate": 2.436561367167271e-05, |
|
"loss": 0.2764, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.248719142990219, |
|
"grad_norm": 0.4063161340916692, |
|
"learning_rate": 2.4300880372863803e-05, |
|
"loss": 0.2708, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 2.253376804843968, |
|
"grad_norm": 0.42036563058341386, |
|
"learning_rate": 2.4236147074054896e-05, |
|
"loss": 0.265, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.2580344666977177, |
|
"grad_norm": 0.3543048649400106, |
|
"learning_rate": 2.417141377524599e-05, |
|
"loss": 0.2751, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.262692128551467, |
|
"grad_norm": 0.3983712556686238, |
|
"learning_rate": 2.410668047643708e-05, |
|
"loss": 0.2675, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.2673497904052167, |
|
"grad_norm": 0.3530424561412936, |
|
"learning_rate": 2.4041947177628175e-05, |
|
"loss": 0.2775, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 2.272007452258966, |
|
"grad_norm": 0.3831906451800883, |
|
"learning_rate": 2.3977213878819265e-05, |
|
"loss": 0.2718, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.276665114112715, |
|
"grad_norm": 0.3629826382220657, |
|
"learning_rate": 2.3912480580010358e-05, |
|
"loss": 0.2714, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 2.2813227759664647, |
|
"grad_norm": 0.3723371127396783, |
|
"learning_rate": 2.384774728120145e-05, |
|
"loss": 0.2713, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.285980437820214, |
|
"grad_norm": 0.33116565655664815, |
|
"learning_rate": 2.3783013982392544e-05, |
|
"loss": 0.2714, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 2.2906380996739637, |
|
"grad_norm": 0.4241473682280153, |
|
"learning_rate": 2.3718280683583634e-05, |
|
"loss": 0.2697, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.295295761527713, |
|
"grad_norm": 0.39705043895634146, |
|
"learning_rate": 2.365354738477473e-05, |
|
"loss": 0.2822, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 2.2999534233814627, |
|
"grad_norm": 0.33163639660401323, |
|
"learning_rate": 2.3588814085965823e-05, |
|
"loss": 0.2718, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.3046110852352117, |
|
"grad_norm": 0.33512581809854164, |
|
"learning_rate": 2.3524080787156916e-05, |
|
"loss": 0.2784, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.3092687470889612, |
|
"grad_norm": 0.3821131258454766, |
|
"learning_rate": 2.345934748834801e-05, |
|
"loss": 0.2718, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.3139264089427107, |
|
"grad_norm": 0.39074319401185925, |
|
"learning_rate": 2.33946141895391e-05, |
|
"loss": 0.2783, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 2.3185840707964602, |
|
"grad_norm": 0.33902978499643044, |
|
"learning_rate": 2.3329880890730195e-05, |
|
"loss": 0.2783, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.3232417326502097, |
|
"grad_norm": 0.3827711970500847, |
|
"learning_rate": 2.3265147591921284e-05, |
|
"loss": 0.2703, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 2.3278993945039588, |
|
"grad_norm": 0.355781107832294, |
|
"learning_rate": 2.3200414293112377e-05, |
|
"loss": 0.2652, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.3325570563577083, |
|
"grad_norm": 0.33162879789458566, |
|
"learning_rate": 2.313568099430347e-05, |
|
"loss": 0.2717, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 2.3372147182114578, |
|
"grad_norm": 0.4338763189528674, |
|
"learning_rate": 2.3070947695494563e-05, |
|
"loss": 0.275, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.3418723800652073, |
|
"grad_norm": 0.383583192216027, |
|
"learning_rate": 2.3006214396685656e-05, |
|
"loss": 0.2729, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 2.3465300419189568, |
|
"grad_norm": 0.32609506468335425, |
|
"learning_rate": 2.294148109787675e-05, |
|
"loss": 0.275, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.3511877037727063, |
|
"grad_norm": 0.3335278023149569, |
|
"learning_rate": 2.2876747799067842e-05, |
|
"loss": 0.2755, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.3558453656264557, |
|
"grad_norm": 0.3350637802852781, |
|
"learning_rate": 2.2812014500258935e-05, |
|
"loss": 0.2732, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.360503027480205, |
|
"grad_norm": 0.37866460267486196, |
|
"learning_rate": 2.274728120145003e-05, |
|
"loss": 0.268, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 2.3651606893339543, |
|
"grad_norm": 0.34555155639658347, |
|
"learning_rate": 2.2682547902641118e-05, |
|
"loss": 0.2676, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.369818351187704, |
|
"grad_norm": 0.3258434102060813, |
|
"learning_rate": 2.2617814603832214e-05, |
|
"loss": 0.2773, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 2.3744760130414533, |
|
"grad_norm": 0.3658190762241647, |
|
"learning_rate": 2.2553081305023304e-05, |
|
"loss": 0.2778, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.3791336748952028, |
|
"grad_norm": 0.4542579563180514, |
|
"learning_rate": 2.2488348006214397e-05, |
|
"loss": 0.2732, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 2.383791336748952, |
|
"grad_norm": 0.3464667680960837, |
|
"learning_rate": 2.242361470740549e-05, |
|
"loss": 0.2727, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.3884489986027013, |
|
"grad_norm": 0.38709489671449077, |
|
"learning_rate": 2.2358881408596583e-05, |
|
"loss": 0.2751, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 2.393106660456451, |
|
"grad_norm": 0.35458794378570924, |
|
"learning_rate": 2.2294148109787676e-05, |
|
"loss": 0.2766, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.3977643223102003, |
|
"grad_norm": 0.31586159359748556, |
|
"learning_rate": 2.222941481097877e-05, |
|
"loss": 0.2747, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.40242198416395, |
|
"grad_norm": 0.47950668564649523, |
|
"learning_rate": 2.2164681512169862e-05, |
|
"loss": 0.2678, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.4070796460176993, |
|
"grad_norm": 0.3547033989457256, |
|
"learning_rate": 2.2099948213360955e-05, |
|
"loss": 0.2714, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 2.4117373078714484, |
|
"grad_norm": 0.3417274119177762, |
|
"learning_rate": 2.2035214914552048e-05, |
|
"loss": 0.2656, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.416394969725198, |
|
"grad_norm": 0.41003338636699, |
|
"learning_rate": 2.1970481615743137e-05, |
|
"loss": 0.2814, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 2.4210526315789473, |
|
"grad_norm": 0.3344591203299114, |
|
"learning_rate": 2.1905748316934234e-05, |
|
"loss": 0.2773, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.425710293432697, |
|
"grad_norm": 0.3688753394872757, |
|
"learning_rate": 2.1841015018125323e-05, |
|
"loss": 0.2714, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 2.4303679552864463, |
|
"grad_norm": 0.4134992480901522, |
|
"learning_rate": 2.177628171931642e-05, |
|
"loss": 0.2688, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.4350256171401954, |
|
"grad_norm": 0.354197807356868, |
|
"learning_rate": 2.171154842050751e-05, |
|
"loss": 0.2695, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 2.439683278993945, |
|
"grad_norm": 0.384555222770943, |
|
"learning_rate": 2.1646815121698602e-05, |
|
"loss": 0.2724, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.4443409408476944, |
|
"grad_norm": 0.33945672656475084, |
|
"learning_rate": 2.1582081822889695e-05, |
|
"loss": 0.2672, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.448998602701444, |
|
"grad_norm": 0.35223398070317613, |
|
"learning_rate": 2.151734852408079e-05, |
|
"loss": 0.266, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.4536562645551934, |
|
"grad_norm": 0.33538981080275043, |
|
"learning_rate": 2.145261522527188e-05, |
|
"loss": 0.2752, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 2.458313926408943, |
|
"grad_norm": 1.7424507890291698, |
|
"learning_rate": 2.1387881926462974e-05, |
|
"loss": 0.2756, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.4629715882626924, |
|
"grad_norm": 0.36857578732146257, |
|
"learning_rate": 2.1323148627654067e-05, |
|
"loss": 0.2753, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 2.4676292501164414, |
|
"grad_norm": 0.37901471619034555, |
|
"learning_rate": 2.1258415328845157e-05, |
|
"loss": 0.2708, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.472286911970191, |
|
"grad_norm": 0.3728918334905263, |
|
"learning_rate": 2.1193682030036253e-05, |
|
"loss": 0.2721, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 2.4769445738239404, |
|
"grad_norm": 0.3349864522210704, |
|
"learning_rate": 2.1128948731227343e-05, |
|
"loss": 0.2701, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.48160223567769, |
|
"grad_norm": 0.3221018339130229, |
|
"learning_rate": 2.106421543241844e-05, |
|
"loss": 0.2714, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 2.4862598975314394, |
|
"grad_norm": 0.372009777674902, |
|
"learning_rate": 2.099948213360953e-05, |
|
"loss": 0.2712, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.4909175593851884, |
|
"grad_norm": 0.31144393603273973, |
|
"learning_rate": 2.0934748834800622e-05, |
|
"loss": 0.2686, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.495575221238938, |
|
"grad_norm": 0.37310898554154925, |
|
"learning_rate": 2.0870015535991715e-05, |
|
"loss": 0.268, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.5002328830926874, |
|
"grad_norm": 0.32429912612731604, |
|
"learning_rate": 2.0805282237182808e-05, |
|
"loss": 0.2722, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 2.504890544946437, |
|
"grad_norm": 0.3645155853567906, |
|
"learning_rate": 2.07405489383739e-05, |
|
"loss": 0.2625, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.5095482068001864, |
|
"grad_norm": 0.37285481357452527, |
|
"learning_rate": 2.0675815639564994e-05, |
|
"loss": 0.269, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 2.514205868653936, |
|
"grad_norm": 0.4059330000248755, |
|
"learning_rate": 2.0611082340756087e-05, |
|
"loss": 0.2647, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.5188635305076854, |
|
"grad_norm": 0.3436088518040214, |
|
"learning_rate": 2.054634904194718e-05, |
|
"loss": 0.2742, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 2.5235211923614345, |
|
"grad_norm": 0.3972933011802129, |
|
"learning_rate": 2.0481615743138273e-05, |
|
"loss": 0.269, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.528178854215184, |
|
"grad_norm": 0.37360697308076835, |
|
"learning_rate": 2.0416882444329362e-05, |
|
"loss": 0.2742, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 2.5328365160689335, |
|
"grad_norm": 0.33376962508013164, |
|
"learning_rate": 2.035214914552046e-05, |
|
"loss": 0.2788, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.537494177922683, |
|
"grad_norm": 0.3907774958559364, |
|
"learning_rate": 2.028741584671155e-05, |
|
"loss": 0.2749, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.542151839776432, |
|
"grad_norm": 0.36884223475006717, |
|
"learning_rate": 2.022268254790264e-05, |
|
"loss": 0.2826, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.5468095016301815, |
|
"grad_norm": 0.37074560443330334, |
|
"learning_rate": 2.0157949249093734e-05, |
|
"loss": 0.2695, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 2.551467163483931, |
|
"grad_norm": 0.3160412629320368, |
|
"learning_rate": 2.0093215950284827e-05, |
|
"loss": 0.2679, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.5561248253376805, |
|
"grad_norm": 0.3034071644528673, |
|
"learning_rate": 2.002848265147592e-05, |
|
"loss": 0.2698, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 2.56078248719143, |
|
"grad_norm": 0.31258186729345505, |
|
"learning_rate": 1.9963749352667013e-05, |
|
"loss": 0.2679, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.5654401490451795, |
|
"grad_norm": 0.292440405711894, |
|
"learning_rate": 1.9899016053858106e-05, |
|
"loss": 0.2684, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 2.570097810898929, |
|
"grad_norm": 0.3363867700320085, |
|
"learning_rate": 1.98342827550492e-05, |
|
"loss": 0.2711, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.574755472752678, |
|
"grad_norm": 0.33149638022316674, |
|
"learning_rate": 1.9769549456240292e-05, |
|
"loss": 0.2681, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 2.5794131346064275, |
|
"grad_norm": 0.34029720041256184, |
|
"learning_rate": 1.9704816157431382e-05, |
|
"loss": 0.2694, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.584070796460177, |
|
"grad_norm": 0.3279656882775838, |
|
"learning_rate": 1.9640082858622478e-05, |
|
"loss": 0.2671, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.5887284583139265, |
|
"grad_norm": 0.3447709556024024, |
|
"learning_rate": 1.9575349559813568e-05, |
|
"loss": 0.2757, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.5933861201676756, |
|
"grad_norm": 0.3106673292073178, |
|
"learning_rate": 1.951061626100466e-05, |
|
"loss": 0.2752, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 2.598043782021425, |
|
"grad_norm": 0.3105299991093135, |
|
"learning_rate": 1.9445882962195754e-05, |
|
"loss": 0.2771, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.6027014438751745, |
|
"grad_norm": 0.2982018872389286, |
|
"learning_rate": 1.9381149663386847e-05, |
|
"loss": 0.2711, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 2.607359105728924, |
|
"grad_norm": 0.31038185590696776, |
|
"learning_rate": 1.931641636457794e-05, |
|
"loss": 0.2719, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.6120167675826735, |
|
"grad_norm": 0.4576009994202879, |
|
"learning_rate": 1.9251683065769033e-05, |
|
"loss": 0.272, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 2.616674429436423, |
|
"grad_norm": 0.38717499530156124, |
|
"learning_rate": 1.9186949766960126e-05, |
|
"loss": 0.272, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.6213320912901725, |
|
"grad_norm": 0.3134007638188876, |
|
"learning_rate": 1.912221646815122e-05, |
|
"loss": 0.2679, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 2.625989753143922, |
|
"grad_norm": 0.3662008822975538, |
|
"learning_rate": 1.9057483169342312e-05, |
|
"loss": 0.275, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.630647414997671, |
|
"grad_norm": 0.338465603582536, |
|
"learning_rate": 1.89927498705334e-05, |
|
"loss": 0.275, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.6353050768514206, |
|
"grad_norm": 0.39664923263948576, |
|
"learning_rate": 1.8928016571724498e-05, |
|
"loss": 0.2696, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.63996273870517, |
|
"grad_norm": 0.3358218724089584, |
|
"learning_rate": 1.8863283272915587e-05, |
|
"loss": 0.277, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 2.6446204005589196, |
|
"grad_norm": 0.39806694330638576, |
|
"learning_rate": 1.879854997410668e-05, |
|
"loss": 0.2801, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.6492780624126686, |
|
"grad_norm": 0.3426052650712918, |
|
"learning_rate": 1.8733816675297773e-05, |
|
"loss": 0.2702, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 2.653935724266418, |
|
"grad_norm": 0.321019545867809, |
|
"learning_rate": 1.8669083376488866e-05, |
|
"loss": 0.2769, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.6585933861201676, |
|
"grad_norm": 0.32384371615715174, |
|
"learning_rate": 1.860435007767996e-05, |
|
"loss": 0.2678, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 2.663251047973917, |
|
"grad_norm": 0.3290749039774749, |
|
"learning_rate": 1.8539616778871052e-05, |
|
"loss": 0.273, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.6679087098276666, |
|
"grad_norm": 0.33856866467686797, |
|
"learning_rate": 1.8474883480062145e-05, |
|
"loss": 0.2725, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 2.672566371681416, |
|
"grad_norm": 0.3500432840827306, |
|
"learning_rate": 1.8410150181253238e-05, |
|
"loss": 0.2731, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.6772240335351656, |
|
"grad_norm": 0.38586231997017223, |
|
"learning_rate": 1.834541688244433e-05, |
|
"loss": 0.2719, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.6818816953889146, |
|
"grad_norm": 0.31604905067500316, |
|
"learning_rate": 1.828068358363542e-05, |
|
"loss": 0.2713, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.686539357242664, |
|
"grad_norm": 0.34688343054005744, |
|
"learning_rate": 1.8215950284826517e-05, |
|
"loss": 0.2793, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 2.6911970190964136, |
|
"grad_norm": 0.30974497872868545, |
|
"learning_rate": 1.8151216986017607e-05, |
|
"loss": 0.2691, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.695854680950163, |
|
"grad_norm": 0.38316570331805355, |
|
"learning_rate": 1.8086483687208703e-05, |
|
"loss": 0.2678, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 2.700512342803912, |
|
"grad_norm": 0.33852769233141367, |
|
"learning_rate": 1.8021750388399793e-05, |
|
"loss": 0.2726, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.7051700046576617, |
|
"grad_norm": 0.3593989177077844, |
|
"learning_rate": 1.7957017089590886e-05, |
|
"loss": 0.2641, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 2.709827666511411, |
|
"grad_norm": 0.30875552983332166, |
|
"learning_rate": 1.789228379078198e-05, |
|
"loss": 0.2673, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.7144853283651607, |
|
"grad_norm": 0.35727081049388676, |
|
"learning_rate": 1.7827550491973072e-05, |
|
"loss": 0.277, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 2.71914299021891, |
|
"grad_norm": 0.3339865640541083, |
|
"learning_rate": 1.7762817193164165e-05, |
|
"loss": 0.2685, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.7238006520726596, |
|
"grad_norm": 0.33169965373972965, |
|
"learning_rate": 1.7698083894355258e-05, |
|
"loss": 0.2756, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.728458313926409, |
|
"grad_norm": 0.3157634299132718, |
|
"learning_rate": 1.763335059554635e-05, |
|
"loss": 0.2774, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.7331159757801586, |
|
"grad_norm": 0.3396944635203724, |
|
"learning_rate": 1.756861729673744e-05, |
|
"loss": 0.2728, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 2.7377736376339077, |
|
"grad_norm": 0.35200393420014703, |
|
"learning_rate": 1.7503883997928537e-05, |
|
"loss": 0.2725, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.742431299487657, |
|
"grad_norm": 0.37998854310284846, |
|
"learning_rate": 1.7439150699119626e-05, |
|
"loss": 0.2776, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 2.7470889613414067, |
|
"grad_norm": 0.3576281147469936, |
|
"learning_rate": 1.7374417400310723e-05, |
|
"loss": 0.2668, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.751746623195156, |
|
"grad_norm": 0.3635900744428691, |
|
"learning_rate": 1.7309684101501812e-05, |
|
"loss": 0.272, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 2.7564042850489052, |
|
"grad_norm": 0.33511685467814833, |
|
"learning_rate": 1.7244950802692905e-05, |
|
"loss": 0.2717, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.7610619469026547, |
|
"grad_norm": 0.3497985322649408, |
|
"learning_rate": 1.7180217503883998e-05, |
|
"loss": 0.2734, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 2.765719608756404, |
|
"grad_norm": 0.353483359237389, |
|
"learning_rate": 1.711548420507509e-05, |
|
"loss": 0.2753, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.7703772706101537, |
|
"grad_norm": 0.3721193002897029, |
|
"learning_rate": 1.7050750906266184e-05, |
|
"loss": 0.2712, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.775034932463903, |
|
"grad_norm": 0.3312140027398143, |
|
"learning_rate": 1.6986017607457277e-05, |
|
"loss": 0.2692, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.7796925943176527, |
|
"grad_norm": 0.39640844681767, |
|
"learning_rate": 1.692128430864837e-05, |
|
"loss": 0.2768, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 2.784350256171402, |
|
"grad_norm": 0.3175338400111014, |
|
"learning_rate": 1.6856551009839463e-05, |
|
"loss": 0.2716, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.7890079180251512, |
|
"grad_norm": 0.31652286586007294, |
|
"learning_rate": 1.6791817711030556e-05, |
|
"loss": 0.2685, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 2.7936655798789007, |
|
"grad_norm": 0.28761607644050446, |
|
"learning_rate": 1.6727084412221646e-05, |
|
"loss": 0.2755, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.7983232417326502, |
|
"grad_norm": 0.3099040855305692, |
|
"learning_rate": 1.6662351113412742e-05, |
|
"loss": 0.2693, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 2.8029809035863997, |
|
"grad_norm": 0.3553610602893777, |
|
"learning_rate": 1.6597617814603832e-05, |
|
"loss": 0.2765, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.807638565440149, |
|
"grad_norm": 0.31104188100335034, |
|
"learning_rate": 1.6532884515794925e-05, |
|
"loss": 0.2596, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 2.8122962272938983, |
|
"grad_norm": 1.220512975098523, |
|
"learning_rate": 1.6468151216986018e-05, |
|
"loss": 0.2753, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.8169538891476478, |
|
"grad_norm": 0.3631142231781718, |
|
"learning_rate": 1.640341791817711e-05, |
|
"loss": 0.2712, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.8216115510013973, |
|
"grad_norm": 0.30471054985328155, |
|
"learning_rate": 1.6338684619368204e-05, |
|
"loss": 0.2713, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.8262692128551468, |
|
"grad_norm": 0.3372934949628952, |
|
"learning_rate": 1.6273951320559297e-05, |
|
"loss": 0.2745, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 2.8309268747088963, |
|
"grad_norm": 0.3077738977129297, |
|
"learning_rate": 1.620921802175039e-05, |
|
"loss": 0.2794, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.8355845365626458, |
|
"grad_norm": 0.3009609913128321, |
|
"learning_rate": 1.6144484722941483e-05, |
|
"loss": 0.2706, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 2.840242198416395, |
|
"grad_norm": 0.30480428019972094, |
|
"learning_rate": 1.6079751424132576e-05, |
|
"loss": 0.2674, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.8448998602701443, |
|
"grad_norm": 0.38749267527788284, |
|
"learning_rate": 1.6015018125323665e-05, |
|
"loss": 0.2763, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 2.849557522123894, |
|
"grad_norm": 0.2810429589384943, |
|
"learning_rate": 1.595028482651476e-05, |
|
"loss": 0.2789, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.8542151839776433, |
|
"grad_norm": 0.30523685729564104, |
|
"learning_rate": 1.588555152770585e-05, |
|
"loss": 0.2737, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 2.858872845831393, |
|
"grad_norm": 0.35926120588650146, |
|
"learning_rate": 1.5820818228896944e-05, |
|
"loss": 0.2643, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.863530507685142, |
|
"grad_norm": 0.3313225013919279, |
|
"learning_rate": 1.5756084930088037e-05, |
|
"loss": 0.2724, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.8681881695388913, |
|
"grad_norm": 0.3244478248567013, |
|
"learning_rate": 1.569135163127913e-05, |
|
"loss": 0.2626, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.872845831392641, |
|
"grad_norm": 0.28666274499426153, |
|
"learning_rate": 1.5626618332470223e-05, |
|
"loss": 0.2696, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 2.8775034932463903, |
|
"grad_norm": 0.35856467204541537, |
|
"learning_rate": 1.5561885033661316e-05, |
|
"loss": 0.2767, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.88216115510014, |
|
"grad_norm": 0.3245668012785978, |
|
"learning_rate": 1.549715173485241e-05, |
|
"loss": 0.2677, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 2.8868188169538893, |
|
"grad_norm": 0.35666892295965125, |
|
"learning_rate": 1.5432418436043502e-05, |
|
"loss": 0.2733, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.891476478807639, |
|
"grad_norm": 0.3356911176206139, |
|
"learning_rate": 1.5367685137234595e-05, |
|
"loss": 0.2749, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 2.896134140661388, |
|
"grad_norm": 0.29808214193997173, |
|
"learning_rate": 1.5302951838425685e-05, |
|
"loss": 0.2658, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.9007918025151374, |
|
"grad_norm": 0.3365027074012271, |
|
"learning_rate": 1.5238218539616781e-05, |
|
"loss": 0.2638, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 2.905449464368887, |
|
"grad_norm": 0.31649157528469446, |
|
"learning_rate": 1.5173485240807872e-05, |
|
"loss": 0.2713, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.9101071262226363, |
|
"grad_norm": 0.39941610710037784, |
|
"learning_rate": 1.5108751941998964e-05, |
|
"loss": 0.2728, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.9147647880763854, |
|
"grad_norm": 0.3350365625256805, |
|
"learning_rate": 1.5044018643190058e-05, |
|
"loss": 0.2636, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.919422449930135, |
|
"grad_norm": 0.3121317275501377, |
|
"learning_rate": 1.497928534438115e-05, |
|
"loss": 0.2683, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 2.9240801117838844, |
|
"grad_norm": 0.32992397885946134, |
|
"learning_rate": 1.4914552045572244e-05, |
|
"loss": 0.2771, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.928737773637634, |
|
"grad_norm": 0.3104785612542341, |
|
"learning_rate": 1.4849818746763336e-05, |
|
"loss": 0.2802, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 2.9333954354913834, |
|
"grad_norm": 0.3284169515856172, |
|
"learning_rate": 1.4785085447954427e-05, |
|
"loss": 0.2717, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.938053097345133, |
|
"grad_norm": 0.5149080066378856, |
|
"learning_rate": 1.4720352149145522e-05, |
|
"loss": 0.2696, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 2.9427107591988824, |
|
"grad_norm": 0.33286678134806846, |
|
"learning_rate": 1.4655618850336613e-05, |
|
"loss": 0.2715, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.9473684210526314, |
|
"grad_norm": 0.29991185736571746, |
|
"learning_rate": 1.4590885551527706e-05, |
|
"loss": 0.2706, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 2.952026082906381, |
|
"grad_norm": 0.2981828082571127, |
|
"learning_rate": 1.45261522527188e-05, |
|
"loss": 0.2692, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.9566837447601304, |
|
"grad_norm": 0.30858159031175714, |
|
"learning_rate": 1.4461418953909892e-05, |
|
"loss": 0.2723, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.96134140661388, |
|
"grad_norm": 0.34811263846999796, |
|
"learning_rate": 1.4396685655100987e-05, |
|
"loss": 0.2701, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.9659990684676294, |
|
"grad_norm": 0.3873651952945082, |
|
"learning_rate": 1.4331952356292078e-05, |
|
"loss": 0.2688, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 2.9706567303213784, |
|
"grad_norm": 0.3522941439771578, |
|
"learning_rate": 1.4267219057483169e-05, |
|
"loss": 0.2634, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.975314392175128, |
|
"grad_norm": 0.34341967428607423, |
|
"learning_rate": 1.4202485758674264e-05, |
|
"loss": 0.2753, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 2.9799720540288774, |
|
"grad_norm": 0.3284918143676433, |
|
"learning_rate": 1.4137752459865355e-05, |
|
"loss": 0.2725, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.984629715882627, |
|
"grad_norm": 0.3150251773196763, |
|
"learning_rate": 1.4073019161056446e-05, |
|
"loss": 0.2729, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 2.9892873777363764, |
|
"grad_norm": 0.3115665963455036, |
|
"learning_rate": 1.4008285862247541e-05, |
|
"loss": 0.2587, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.993945039590126, |
|
"grad_norm": 0.2930991072668908, |
|
"learning_rate": 1.3943552563438634e-05, |
|
"loss": 0.2585, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 2.9986027014438754, |
|
"grad_norm": 0.3122070563393351, |
|
"learning_rate": 1.3878819264629725e-05, |
|
"loss": 0.2753, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 3.0027945971122496, |
|
"grad_norm": 0.3304819813611403, |
|
"learning_rate": 1.381408596582082e-05, |
|
"loss": 0.2101, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 3.007452258965999, |
|
"grad_norm": 0.37549362163456124, |
|
"learning_rate": 1.3749352667011911e-05, |
|
"loss": 0.183, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 3.0121099208197486, |
|
"grad_norm": 0.3748665980288928, |
|
"learning_rate": 1.3684619368203006e-05, |
|
"loss": 0.179, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 3.016767582673498, |
|
"grad_norm": 0.3134994608529235, |
|
"learning_rate": 1.3619886069394097e-05, |
|
"loss": 0.1742, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.021425244527247, |
|
"grad_norm": 0.3056502084704337, |
|
"learning_rate": 1.3555152770585189e-05, |
|
"loss": 0.1806, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 3.0260829063809966, |
|
"grad_norm": 0.31098559463773384, |
|
"learning_rate": 1.3490419471776283e-05, |
|
"loss": 0.1759, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.030740568234746, |
|
"grad_norm": 0.3156350772890806, |
|
"learning_rate": 1.3425686172967375e-05, |
|
"loss": 0.1796, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 3.0353982300884956, |
|
"grad_norm": 0.34419970542768846, |
|
"learning_rate": 1.3360952874158466e-05, |
|
"loss": 0.1804, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 3.040055891942245, |
|
"grad_norm": 0.3199306178559233, |
|
"learning_rate": 1.329621957534956e-05, |
|
"loss": 0.1796, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 3.0447135537959946, |
|
"grad_norm": 0.3153751181374002, |
|
"learning_rate": 1.3231486276540654e-05, |
|
"loss": 0.1782, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 3.0493712156497437, |
|
"grad_norm": 0.3079364441771498, |
|
"learning_rate": 1.3166752977731747e-05, |
|
"loss": 0.1805, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 3.054028877503493, |
|
"grad_norm": 0.28739311203736595, |
|
"learning_rate": 1.310201967892284e-05, |
|
"loss": 0.174, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 3.0586865393572427, |
|
"grad_norm": 0.2984939778181826, |
|
"learning_rate": 1.303728638011393e-05, |
|
"loss": 0.1823, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 3.063344201210992, |
|
"grad_norm": 0.2929108597306291, |
|
"learning_rate": 1.2972553081305026e-05, |
|
"loss": 0.1772, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 3.0680018630647417, |
|
"grad_norm": 0.28458008501674803, |
|
"learning_rate": 1.2907819782496117e-05, |
|
"loss": 0.1759, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 3.0726595249184907, |
|
"grad_norm": 0.292581882013171, |
|
"learning_rate": 1.2843086483687208e-05, |
|
"loss": 0.1721, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.07731718677224, |
|
"grad_norm": 0.2943365822596842, |
|
"learning_rate": 1.2778353184878303e-05, |
|
"loss": 0.1751, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 3.0819748486259897, |
|
"grad_norm": 0.29377045603091445, |
|
"learning_rate": 1.2713619886069394e-05, |
|
"loss": 0.1723, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 3.086632510479739, |
|
"grad_norm": 0.3851561974798118, |
|
"learning_rate": 1.2648886587260485e-05, |
|
"loss": 0.1774, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 3.0912901723334887, |
|
"grad_norm": 0.27567094909537965, |
|
"learning_rate": 1.258415328845158e-05, |
|
"loss": 0.1725, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 3.095947834187238, |
|
"grad_norm": 0.29697155443114787, |
|
"learning_rate": 1.2519419989642673e-05, |
|
"loss": 0.1817, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 3.1006054960409872, |
|
"grad_norm": 0.6608481087066058, |
|
"learning_rate": 1.2454686690833766e-05, |
|
"loss": 0.1775, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 3.1052631578947367, |
|
"grad_norm": 0.29223487514964575, |
|
"learning_rate": 1.2389953392024859e-05, |
|
"loss": 0.176, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 3.109920819748486, |
|
"grad_norm": 0.29753580371777344, |
|
"learning_rate": 1.2325220093215952e-05, |
|
"loss": 0.1757, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 3.1145784816022357, |
|
"grad_norm": 0.28709197068860887, |
|
"learning_rate": 1.2260486794407043e-05, |
|
"loss": 0.1784, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 3.119236143455985, |
|
"grad_norm": 0.29533810100485525, |
|
"learning_rate": 1.2195753495598136e-05, |
|
"loss": 0.1853, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 3.1238938053097347, |
|
"grad_norm": 0.3018378671525719, |
|
"learning_rate": 1.213102019678923e-05, |
|
"loss": 0.1755, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 3.1285514671634838, |
|
"grad_norm": 0.2891240704748271, |
|
"learning_rate": 1.2066286897980322e-05, |
|
"loss": 0.177, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 3.1332091290172333, |
|
"grad_norm": 0.309067605426475, |
|
"learning_rate": 1.2001553599171414e-05, |
|
"loss": 0.1824, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 3.1378667908709827, |
|
"grad_norm": 0.2766369819985356, |
|
"learning_rate": 1.1936820300362507e-05, |
|
"loss": 0.1784, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 3.1425244527247322, |
|
"grad_norm": 0.2979597544234227, |
|
"learning_rate": 1.18720870015536e-05, |
|
"loss": 0.1784, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 3.1471821145784817, |
|
"grad_norm": 0.33228924530041026, |
|
"learning_rate": 1.1807353702744693e-05, |
|
"loss": 0.1714, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 3.1518397764322312, |
|
"grad_norm": 0.28703822230981474, |
|
"learning_rate": 1.1742620403935786e-05, |
|
"loss": 0.1767, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 3.1564974382859803, |
|
"grad_norm": 0.3121502606416562, |
|
"learning_rate": 1.1677887105126878e-05, |
|
"loss": 0.1767, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 3.16115510013973, |
|
"grad_norm": 0.28866525138955385, |
|
"learning_rate": 1.1613153806317971e-05, |
|
"loss": 0.1733, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 3.1658127619934793, |
|
"grad_norm": 0.292655197435037, |
|
"learning_rate": 1.1548420507509063e-05, |
|
"loss": 0.1733, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.1704704238472288, |
|
"grad_norm": 0.2959791687636977, |
|
"learning_rate": 1.1483687208700156e-05, |
|
"loss": 0.1758, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 3.1751280857009783, |
|
"grad_norm": 0.2982918136803687, |
|
"learning_rate": 1.1418953909891249e-05, |
|
"loss": 0.1836, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 3.1797857475547273, |
|
"grad_norm": 0.30298460591386067, |
|
"learning_rate": 1.1354220611082342e-05, |
|
"loss": 0.1751, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 3.184443409408477, |
|
"grad_norm": 0.31276656779659445, |
|
"learning_rate": 1.1289487312273433e-05, |
|
"loss": 0.1811, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 3.1891010712622263, |
|
"grad_norm": 0.29832050835385665, |
|
"learning_rate": 1.1224754013464526e-05, |
|
"loss": 0.1795, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 3.193758733115976, |
|
"grad_norm": 0.30514056117416016, |
|
"learning_rate": 1.1160020714655619e-05, |
|
"loss": 0.1805, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 3.1984163949697253, |
|
"grad_norm": 0.3029193167142536, |
|
"learning_rate": 1.1095287415846712e-05, |
|
"loss": 0.181, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 3.203074056823475, |
|
"grad_norm": 0.30746488264340455, |
|
"learning_rate": 1.1030554117037805e-05, |
|
"loss": 0.1811, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 3.207731718677224, |
|
"grad_norm": 0.2914819757567237, |
|
"learning_rate": 1.0965820818228898e-05, |
|
"loss": 0.1726, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 3.2123893805309733, |
|
"grad_norm": 0.2886147432095147, |
|
"learning_rate": 1.0901087519419991e-05, |
|
"loss": 0.1775, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 3.217047042384723, |
|
"grad_norm": 0.29564777298103045, |
|
"learning_rate": 1.0836354220611084e-05, |
|
"loss": 0.1789, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 3.2217047042384723, |
|
"grad_norm": 0.2892508127991348, |
|
"learning_rate": 1.0771620921802175e-05, |
|
"loss": 0.1778, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 3.226362366092222, |
|
"grad_norm": 0.28774906064692896, |
|
"learning_rate": 1.0706887622993268e-05, |
|
"loss": 0.176, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 3.2310200279459713, |
|
"grad_norm": 0.30800900764013556, |
|
"learning_rate": 1.0642154324184361e-05, |
|
"loss": 0.1783, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 3.2356776897997204, |
|
"grad_norm": 0.2793667780450472, |
|
"learning_rate": 1.0577421025375453e-05, |
|
"loss": 0.1776, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 3.24033535165347, |
|
"grad_norm": 0.2960876247684058, |
|
"learning_rate": 1.0512687726566546e-05, |
|
"loss": 0.1765, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 3.2449930135072194, |
|
"grad_norm": 0.32930610906590374, |
|
"learning_rate": 1.0447954427757638e-05, |
|
"loss": 0.1816, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 3.249650675360969, |
|
"grad_norm": 0.3159028312568502, |
|
"learning_rate": 1.0383221128948731e-05, |
|
"loss": 0.1814, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 3.2543083372147183, |
|
"grad_norm": 0.3037903738896479, |
|
"learning_rate": 1.0318487830139824e-05, |
|
"loss": 0.1873, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 3.258965999068468, |
|
"grad_norm": 0.30690210747220553, |
|
"learning_rate": 1.0253754531330917e-05, |
|
"loss": 0.1792, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.263623660922217, |
|
"grad_norm": 0.2971216838179025, |
|
"learning_rate": 1.018902123252201e-05, |
|
"loss": 0.182, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 3.2682813227759664, |
|
"grad_norm": 0.3047994804606214, |
|
"learning_rate": 1.0124287933713103e-05, |
|
"loss": 0.1802, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 3.272938984629716, |
|
"grad_norm": 0.29190460822979236, |
|
"learning_rate": 1.0059554634904195e-05, |
|
"loss": 0.1788, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 3.2775966464834654, |
|
"grad_norm": 0.3046401879655743, |
|
"learning_rate": 9.994821336095288e-06, |
|
"loss": 0.1788, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 3.282254308337215, |
|
"grad_norm": 0.31991453702020917, |
|
"learning_rate": 9.93008803728638e-06, |
|
"loss": 0.1816, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 3.286911970190964, |
|
"grad_norm": 0.28860232259659196, |
|
"learning_rate": 9.865354738477474e-06, |
|
"loss": 0.1734, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 3.2915696320447134, |
|
"grad_norm": 0.28646435016895677, |
|
"learning_rate": 9.800621439668565e-06, |
|
"loss": 0.1729, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 3.296227293898463, |
|
"grad_norm": 0.2878206575379541, |
|
"learning_rate": 9.735888140859658e-06, |
|
"loss": 0.1802, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.3008849557522124, |
|
"grad_norm": 0.278596793180263, |
|
"learning_rate": 9.671154842050751e-06, |
|
"loss": 0.1783, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 3.305542617605962, |
|
"grad_norm": 0.29157821066173406, |
|
"learning_rate": 9.606421543241844e-06, |
|
"loss": 0.1753, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.3102002794597114, |
|
"grad_norm": 0.28868610667936917, |
|
"learning_rate": 9.541688244432937e-06, |
|
"loss": 0.1734, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 3.3148579413134605, |
|
"grad_norm": 0.2829748445043861, |
|
"learning_rate": 9.47695494562403e-06, |
|
"loss": 0.1791, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 3.31951560316721, |
|
"grad_norm": 0.31936811621546346, |
|
"learning_rate": 9.412221646815123e-06, |
|
"loss": 0.1801, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 3.3241732650209594, |
|
"grad_norm": 0.28502956960820763, |
|
"learning_rate": 9.347488348006214e-06, |
|
"loss": 0.1771, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.328830926874709, |
|
"grad_norm": 0.2990286469354999, |
|
"learning_rate": 9.282755049197307e-06, |
|
"loss": 0.1783, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 3.3334885887284584, |
|
"grad_norm": 0.3076827669029228, |
|
"learning_rate": 9.2180217503884e-06, |
|
"loss": 0.1762, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 3.3381462505822075, |
|
"grad_norm": 0.30032433256579627, |
|
"learning_rate": 9.153288451579493e-06, |
|
"loss": 0.1812, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 3.342803912435957, |
|
"grad_norm": 0.2763729371529282, |
|
"learning_rate": 9.088555152770584e-06, |
|
"loss": 0.1756, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 3.3474615742897065, |
|
"grad_norm": 0.28665241347886994, |
|
"learning_rate": 9.023821853961677e-06, |
|
"loss": 0.1781, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 3.352119236143456, |
|
"grad_norm": 0.2852578172845054, |
|
"learning_rate": 8.95908855515277e-06, |
|
"loss": 0.1766, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.3567768979972055, |
|
"grad_norm": 0.2999218538137478, |
|
"learning_rate": 8.894355256343863e-06, |
|
"loss": 0.1742, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 3.361434559850955, |
|
"grad_norm": 0.2778982809107516, |
|
"learning_rate": 8.829621957534956e-06, |
|
"loss": 0.1766, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 3.3660922217047045, |
|
"grad_norm": 0.2870631712767666, |
|
"learning_rate": 8.76488865872605e-06, |
|
"loss": 0.1735, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 3.3707498835584535, |
|
"grad_norm": 0.28630778046736993, |
|
"learning_rate": 8.700155359917142e-06, |
|
"loss": 0.1758, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 3.375407545412203, |
|
"grad_norm": 0.28571554107591934, |
|
"learning_rate": 8.635422061108235e-06, |
|
"loss": 0.1802, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 3.3800652072659525, |
|
"grad_norm": 0.2994504977989836, |
|
"learning_rate": 8.570688762299327e-06, |
|
"loss": 0.1808, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 3.384722869119702, |
|
"grad_norm": 0.28410097054733224, |
|
"learning_rate": 8.50595546349042e-06, |
|
"loss": 0.1808, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 3.3893805309734515, |
|
"grad_norm": 0.28236954251213214, |
|
"learning_rate": 8.441222164681513e-06, |
|
"loss": 0.1741, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 3.3940381928272005, |
|
"grad_norm": 0.3084957965343309, |
|
"learning_rate": 8.376488865872606e-06, |
|
"loss": 0.1907, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 3.39869585468095, |
|
"grad_norm": 0.29554611127145863, |
|
"learning_rate": 8.311755567063697e-06, |
|
"loss": 0.1788, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.4033535165346995, |
|
"grad_norm": 0.3144424173115396, |
|
"learning_rate": 8.24702226825479e-06, |
|
"loss": 0.1809, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 3.408011178388449, |
|
"grad_norm": 0.2918858454691064, |
|
"learning_rate": 8.182288969445883e-06, |
|
"loss": 0.174, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 3.4126688402421985, |
|
"grad_norm": 0.2892152457631711, |
|
"learning_rate": 8.117555670636976e-06, |
|
"loss": 0.1763, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 3.417326502095948, |
|
"grad_norm": 0.28373511140587954, |
|
"learning_rate": 8.052822371828069e-06, |
|
"loss": 0.1782, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 3.421984163949697, |
|
"grad_norm": 0.272525018346227, |
|
"learning_rate": 7.988089073019162e-06, |
|
"loss": 0.1728, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 3.4266418258034466, |
|
"grad_norm": 0.27049351592069754, |
|
"learning_rate": 7.923355774210255e-06, |
|
"loss": 0.1748, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 3.431299487657196, |
|
"grad_norm": 0.2908050514006356, |
|
"learning_rate": 7.858622475401346e-06, |
|
"loss": 0.1816, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 3.4359571495109456, |
|
"grad_norm": 0.2771715804654571, |
|
"learning_rate": 7.793889176592439e-06, |
|
"loss": 0.1775, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 3.440614811364695, |
|
"grad_norm": 0.307408789296849, |
|
"learning_rate": 7.729155877783532e-06, |
|
"loss": 0.1803, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 3.445272473218444, |
|
"grad_norm": 0.2896861472425251, |
|
"learning_rate": 7.664422578974625e-06, |
|
"loss": 0.1751, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.4499301350721936, |
|
"grad_norm": 0.279195615233317, |
|
"learning_rate": 7.599689280165717e-06, |
|
"loss": 0.1798, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 3.454587796925943, |
|
"grad_norm": 0.29432038056377485, |
|
"learning_rate": 7.53495598135681e-06, |
|
"loss": 0.1828, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 3.4592454587796926, |
|
"grad_norm": 0.29060489107041276, |
|
"learning_rate": 7.470222682547903e-06, |
|
"loss": 0.1745, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 3.463903120633442, |
|
"grad_norm": 0.28785175023856524, |
|
"learning_rate": 7.405489383738996e-06, |
|
"loss": 0.1774, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.4685607824871916, |
|
"grad_norm": 0.2824942037705507, |
|
"learning_rate": 7.340756084930088e-06, |
|
"loss": 0.1798, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 3.473218444340941, |
|
"grad_norm": 0.2885091784401793, |
|
"learning_rate": 7.276022786121181e-06, |
|
"loss": 0.1791, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 3.47787610619469, |
|
"grad_norm": 0.2838658738954116, |
|
"learning_rate": 7.211289487312274e-06, |
|
"loss": 0.1746, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 3.4825337680484396, |
|
"grad_norm": 0.28887842909118483, |
|
"learning_rate": 7.146556188503367e-06, |
|
"loss": 0.175, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 3.487191429902189, |
|
"grad_norm": 0.2843809341741585, |
|
"learning_rate": 7.081822889694459e-06, |
|
"loss": 0.177, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 3.4918490917559386, |
|
"grad_norm": 0.28621203602989626, |
|
"learning_rate": 7.017089590885552e-06, |
|
"loss": 0.178, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.496506753609688, |
|
"grad_norm": 0.28912146796078997, |
|
"learning_rate": 6.952356292076645e-06, |
|
"loss": 0.1786, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 3.501164415463437, |
|
"grad_norm": 0.28458889587226993, |
|
"learning_rate": 6.887622993267737e-06, |
|
"loss": 0.1755, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 3.5058220773171866, |
|
"grad_norm": 0.29550375520040006, |
|
"learning_rate": 6.82288969445883e-06, |
|
"loss": 0.173, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 3.510479739170936, |
|
"grad_norm": 0.31286759510416146, |
|
"learning_rate": 6.758156395649923e-06, |
|
"loss": 0.174, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 3.5151374010246856, |
|
"grad_norm": 0.28829238017584236, |
|
"learning_rate": 6.693423096841016e-06, |
|
"loss": 0.1718, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 3.519795062878435, |
|
"grad_norm": 0.28945905000643063, |
|
"learning_rate": 6.628689798032108e-06, |
|
"loss": 0.1717, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 3.5244527247321846, |
|
"grad_norm": 0.29706213456606245, |
|
"learning_rate": 6.563956499223201e-06, |
|
"loss": 0.1767, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 3.529110386585934, |
|
"grad_norm": 0.2697220597100272, |
|
"learning_rate": 6.499223200414294e-06, |
|
"loss": 0.1759, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 3.533768048439683, |
|
"grad_norm": 0.28625641383280975, |
|
"learning_rate": 6.434489901605387e-06, |
|
"loss": 0.1848, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 3.5384257102934327, |
|
"grad_norm": 0.27959609788340817, |
|
"learning_rate": 6.369756602796478e-06, |
|
"loss": 0.1759, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.543083372147182, |
|
"grad_norm": 0.28228154116471305, |
|
"learning_rate": 6.305023303987571e-06, |
|
"loss": 0.1769, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 3.5477410340009317, |
|
"grad_norm": 0.26985107405345043, |
|
"learning_rate": 6.240290005178664e-06, |
|
"loss": 0.1778, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 3.5523986958546807, |
|
"grad_norm": 0.27337149644653264, |
|
"learning_rate": 6.175556706369757e-06, |
|
"loss": 0.1772, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 3.55705635770843, |
|
"grad_norm": 0.27104309189982073, |
|
"learning_rate": 6.11082340756085e-06, |
|
"loss": 0.1754, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 3.5617140195621797, |
|
"grad_norm": 0.30932575066266116, |
|
"learning_rate": 6.046090108751942e-06, |
|
"loss": 0.1776, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 3.566371681415929, |
|
"grad_norm": 0.29740580773652014, |
|
"learning_rate": 5.981356809943035e-06, |
|
"loss": 0.176, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 3.5710293432696787, |
|
"grad_norm": 0.276523784239008, |
|
"learning_rate": 5.916623511134127e-06, |
|
"loss": 0.1799, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 3.575687005123428, |
|
"grad_norm": 0.29013154798571805, |
|
"learning_rate": 5.85189021232522e-06, |
|
"loss": 0.1768, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 3.5803446669771777, |
|
"grad_norm": 0.29252747153466957, |
|
"learning_rate": 5.787156913516313e-06, |
|
"loss": 0.1738, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 3.5850023288309267, |
|
"grad_norm": 0.28051730805375724, |
|
"learning_rate": 5.7224236147074054e-06, |
|
"loss": 0.1756, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 3.5896599906846762, |
|
"grad_norm": 0.29837466182008804, |
|
"learning_rate": 5.6576903158984984e-06, |
|
"loss": 0.1806, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 3.5943176525384257, |
|
"grad_norm": 0.2902254049349514, |
|
"learning_rate": 5.5929570170895906e-06, |
|
"loss": 0.1758, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 3.598975314392175, |
|
"grad_norm": 0.29690468932528485, |
|
"learning_rate": 5.5282237182806836e-06, |
|
"loss": 0.1812, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 3.6036329762459243, |
|
"grad_norm": 0.41903251792225776, |
|
"learning_rate": 5.4634904194717765e-06, |
|
"loss": 0.1761, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 3.6082906380996738, |
|
"grad_norm": 0.2757754067100353, |
|
"learning_rate": 5.3987571206628695e-06, |
|
"loss": 0.1759, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 3.6129482999534233, |
|
"grad_norm": 0.28245805312769273, |
|
"learning_rate": 5.334023821853962e-06, |
|
"loss": 0.1736, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 3.6176059618071728, |
|
"grad_norm": 0.2664074642960957, |
|
"learning_rate": 5.269290523045055e-06, |
|
"loss": 0.1732, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 3.6222636236609222, |
|
"grad_norm": 0.2736243477212168, |
|
"learning_rate": 5.204557224236147e-06, |
|
"loss": 0.1743, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 3.6269212855146717, |
|
"grad_norm": 0.2775685835752688, |
|
"learning_rate": 5.139823925427241e-06, |
|
"loss": 0.1754, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 3.6315789473684212, |
|
"grad_norm": 0.26500655531124734, |
|
"learning_rate": 5.075090626618333e-06, |
|
"loss": 0.1719, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.6362366092221707, |
|
"grad_norm": 0.27561475493184445, |
|
"learning_rate": 5.010357327809426e-06, |
|
"loss": 0.1784, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 3.64089427107592, |
|
"grad_norm": 0.28363025571766304, |
|
"learning_rate": 4.945624029000518e-06, |
|
"loss": 0.1711, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 3.6455519329296693, |
|
"grad_norm": 0.27578950261131024, |
|
"learning_rate": 4.880890730191611e-06, |
|
"loss": 0.1745, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 3.6502095947834188, |
|
"grad_norm": 0.2689287311474901, |
|
"learning_rate": 4.816157431382704e-06, |
|
"loss": 0.1673, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 3.6548672566371683, |
|
"grad_norm": 0.2768899738432259, |
|
"learning_rate": 4.751424132573796e-06, |
|
"loss": 0.1668, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 3.6595249184909173, |
|
"grad_norm": 0.2762059518655408, |
|
"learning_rate": 4.686690833764889e-06, |
|
"loss": 0.1746, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 3.664182580344667, |
|
"grad_norm": 0.2854409744347062, |
|
"learning_rate": 4.621957534955981e-06, |
|
"loss": 0.1771, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 3.6688402421984163, |
|
"grad_norm": 0.27071826247278635, |
|
"learning_rate": 4.557224236147074e-06, |
|
"loss": 0.1734, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 3.673497904052166, |
|
"grad_norm": 0.31409279940314927, |
|
"learning_rate": 4.492490937338166e-06, |
|
"loss": 0.173, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 3.6781555659059153, |
|
"grad_norm": 0.2712537638578198, |
|
"learning_rate": 4.42775763852926e-06, |
|
"loss": 0.1729, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.682813227759665, |
|
"grad_norm": 0.2659242739993353, |
|
"learning_rate": 4.363024339720352e-06, |
|
"loss": 0.1781, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 3.6874708896134143, |
|
"grad_norm": 0.2878431083722301, |
|
"learning_rate": 4.298291040911445e-06, |
|
"loss": 0.1755, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 3.6921285514671633, |
|
"grad_norm": 0.27004564279309073, |
|
"learning_rate": 4.233557742102537e-06, |
|
"loss": 0.1722, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 3.696786213320913, |
|
"grad_norm": 0.2726539565276091, |
|
"learning_rate": 4.16882444329363e-06, |
|
"loss": 0.1778, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 3.7014438751746623, |
|
"grad_norm": 0.26268681246368897, |
|
"learning_rate": 4.104091144484723e-06, |
|
"loss": 0.1695, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 3.706101537028412, |
|
"grad_norm": 0.2760735546995598, |
|
"learning_rate": 4.039357845675816e-06, |
|
"loss": 0.1726, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 3.710759198882161, |
|
"grad_norm": 0.28424803977449115, |
|
"learning_rate": 3.9746245468669085e-06, |
|
"loss": 0.1799, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 3.7154168607359104, |
|
"grad_norm": 0.2941948305095177, |
|
"learning_rate": 3.9098912480580015e-06, |
|
"loss": 0.1706, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 3.72007452258966, |
|
"grad_norm": 0.2804830682382998, |
|
"learning_rate": 3.845157949249094e-06, |
|
"loss": 0.175, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 3.7247321844434094, |
|
"grad_norm": 0.25894069673660725, |
|
"learning_rate": 3.780424650440187e-06, |
|
"loss": 0.179, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.729389846297159, |
|
"grad_norm": 0.2770495343014349, |
|
"learning_rate": 3.715691351631279e-06, |
|
"loss": 0.1718, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 3.7340475081509084, |
|
"grad_norm": 0.27245259036566644, |
|
"learning_rate": 3.650958052822372e-06, |
|
"loss": 0.1745, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 3.738705170004658, |
|
"grad_norm": 0.3178518519661775, |
|
"learning_rate": 3.5862247540134647e-06, |
|
"loss": 0.1718, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 3.7433628318584073, |
|
"grad_norm": 0.2725475491559217, |
|
"learning_rate": 3.521491455204557e-06, |
|
"loss": 0.1787, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 3.7480204937121564, |
|
"grad_norm": 0.2815887851859046, |
|
"learning_rate": 3.4567581563956503e-06, |
|
"loss": 0.1804, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 3.752678155565906, |
|
"grad_norm": 0.2671335476290762, |
|
"learning_rate": 3.3920248575867424e-06, |
|
"loss": 0.1725, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 3.7573358174196554, |
|
"grad_norm": 0.2884011738724322, |
|
"learning_rate": 3.3272915587778354e-06, |
|
"loss": 0.172, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 3.761993479273405, |
|
"grad_norm": 0.272777124868328, |
|
"learning_rate": 3.262558259968928e-06, |
|
"loss": 0.1719, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 3.766651141127154, |
|
"grad_norm": 0.27639814202614327, |
|
"learning_rate": 3.197824961160021e-06, |
|
"loss": 0.1755, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 3.7713088029809034, |
|
"grad_norm": 0.26949037354351396, |
|
"learning_rate": 3.1330916623511135e-06, |
|
"loss": 0.1759, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.775966464834653, |
|
"grad_norm": 0.2722470274160135, |
|
"learning_rate": 3.0683583635422065e-06, |
|
"loss": 0.1817, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 3.7806241266884024, |
|
"grad_norm": 0.2602766776236453, |
|
"learning_rate": 3.003625064733299e-06, |
|
"loss": 0.1715, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 3.785281788542152, |
|
"grad_norm": 0.28711780692476924, |
|
"learning_rate": 2.9388917659243916e-06, |
|
"loss": 0.1711, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 3.7899394503959014, |
|
"grad_norm": 0.27051801081640436, |
|
"learning_rate": 2.874158467115484e-06, |
|
"loss": 0.1728, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 3.794597112249651, |
|
"grad_norm": 0.2828893708081034, |
|
"learning_rate": 2.8094251683065768e-06, |
|
"loss": 0.171, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 3.7992547741034, |
|
"grad_norm": 0.2723968547135529, |
|
"learning_rate": 2.7446918694976698e-06, |
|
"loss": 0.1723, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 3.8039124359571495, |
|
"grad_norm": 0.26655651659598223, |
|
"learning_rate": 2.6799585706887623e-06, |
|
"loss": 0.1729, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 3.808570097810899, |
|
"grad_norm": 0.26770981285481243, |
|
"learning_rate": 2.615225271879855e-06, |
|
"loss": 0.1749, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 3.8132277596646484, |
|
"grad_norm": 0.2758365036666948, |
|
"learning_rate": 2.550491973070948e-06, |
|
"loss": 0.1716, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 3.8178854215183975, |
|
"grad_norm": 0.2610019392205348, |
|
"learning_rate": 2.4857586742620404e-06, |
|
"loss": 0.1718, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.822543083372147, |
|
"grad_norm": 0.2685711043716591, |
|
"learning_rate": 2.421025375453133e-06, |
|
"loss": 0.1735, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 3.8272007452258965, |
|
"grad_norm": 0.27773939842189077, |
|
"learning_rate": 2.356292076644226e-06, |
|
"loss": 0.1729, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 3.831858407079646, |
|
"grad_norm": 0.27818722043603183, |
|
"learning_rate": 2.2915587778353186e-06, |
|
"loss": 0.18, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 3.8365160689333955, |
|
"grad_norm": 0.29022869128773615, |
|
"learning_rate": 2.2268254790264115e-06, |
|
"loss": 0.1701, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 3.841173730787145, |
|
"grad_norm": 0.26892060504547427, |
|
"learning_rate": 2.162092180217504e-06, |
|
"loss": 0.1748, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 3.8458313926408945, |
|
"grad_norm": 0.2742194085553554, |
|
"learning_rate": 2.0973588814085967e-06, |
|
"loss": 0.1736, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 3.850489054494644, |
|
"grad_norm": 0.280664869183797, |
|
"learning_rate": 2.0326255825996897e-06, |
|
"loss": 0.1742, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 3.855146716348393, |
|
"grad_norm": 0.2663084540169044, |
|
"learning_rate": 1.9678922837907822e-06, |
|
"loss": 0.1755, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 3.8598043782021425, |
|
"grad_norm": 0.2683512464822063, |
|
"learning_rate": 1.903158984981875e-06, |
|
"loss": 0.1765, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 3.864462040055892, |
|
"grad_norm": 0.28077450679398896, |
|
"learning_rate": 1.8384256861729676e-06, |
|
"loss": 0.1713, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.8691197019096415, |
|
"grad_norm": 0.2629212409957542, |
|
"learning_rate": 1.77369238736406e-06, |
|
"loss": 0.1746, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 3.8737773637633905, |
|
"grad_norm": 0.2862486384413756, |
|
"learning_rate": 1.7089590885551527e-06, |
|
"loss": 0.1816, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.87843502561714, |
|
"grad_norm": 0.2767392538386716, |
|
"learning_rate": 1.6442257897462455e-06, |
|
"loss": 0.1754, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 3.8830926874708895, |
|
"grad_norm": 0.26967392829121106, |
|
"learning_rate": 1.579492490937338e-06, |
|
"loss": 0.1699, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 3.887750349324639, |
|
"grad_norm": 0.26837525839027015, |
|
"learning_rate": 1.514759192128431e-06, |
|
"loss": 0.177, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 3.8924080111783885, |
|
"grad_norm": 0.2627808696747621, |
|
"learning_rate": 1.4500258933195236e-06, |
|
"loss": 0.1719, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 3.897065673032138, |
|
"grad_norm": 0.2639957998761549, |
|
"learning_rate": 1.3852925945106164e-06, |
|
"loss": 0.1708, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 3.9017233348858875, |
|
"grad_norm": 0.27125749404584937, |
|
"learning_rate": 1.320559295701709e-06, |
|
"loss": 0.174, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 3.9063809967396366, |
|
"grad_norm": 0.2769883473331165, |
|
"learning_rate": 1.2558259968928017e-06, |
|
"loss": 0.1719, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 3.911038658593386, |
|
"grad_norm": 0.8395951166898823, |
|
"learning_rate": 1.1910926980838945e-06, |
|
"loss": 0.1696, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.9156963204471356, |
|
"grad_norm": 0.2571958789773536, |
|
"learning_rate": 1.126359399274987e-06, |
|
"loss": 0.173, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 3.920353982300885, |
|
"grad_norm": 0.3193999846164976, |
|
"learning_rate": 1.0616261004660798e-06, |
|
"loss": 0.1719, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.925011644154634, |
|
"grad_norm": 0.27512694081708594, |
|
"learning_rate": 9.968928016571726e-07, |
|
"loss": 0.1703, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 3.9296693060083836, |
|
"grad_norm": 0.27073480012745654, |
|
"learning_rate": 9.321595028482653e-07, |
|
"loss": 0.1701, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.934326967862133, |
|
"grad_norm": 0.26389705940762015, |
|
"learning_rate": 8.674262040393578e-07, |
|
"loss": 0.1708, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 3.9389846297158826, |
|
"grad_norm": 0.2575701459397594, |
|
"learning_rate": 8.026929052304505e-07, |
|
"loss": 0.1747, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.943642291569632, |
|
"grad_norm": 0.27561192360509723, |
|
"learning_rate": 7.379596064215433e-07, |
|
"loss": 0.1705, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 3.9482999534233816, |
|
"grad_norm": 0.256804333426344, |
|
"learning_rate": 6.73226307612636e-07, |
|
"loss": 0.1719, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.952957615277131, |
|
"grad_norm": 0.25955876150091883, |
|
"learning_rate": 6.084930088037286e-07, |
|
"loss": 0.1714, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 3.9576152771308806, |
|
"grad_norm": 0.26478526146557524, |
|
"learning_rate": 5.437597099948214e-07, |
|
"loss": 0.1713, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.9622729389846296, |
|
"grad_norm": 0.27991810492833324, |
|
"learning_rate": 4.790264111859141e-07, |
|
"loss": 0.176, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 3.966930600838379, |
|
"grad_norm": 0.2665648771975109, |
|
"learning_rate": 4.1429311237700674e-07, |
|
"loss": 0.1718, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.9715882626921286, |
|
"grad_norm": 0.2644112154207203, |
|
"learning_rate": 3.495598135680994e-07, |
|
"loss": 0.1756, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 3.976245924545878, |
|
"grad_norm": 0.26058860902480596, |
|
"learning_rate": 2.8482651475919213e-07, |
|
"loss": 0.1725, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.980903586399627, |
|
"grad_norm": 0.27155646802982203, |
|
"learning_rate": 2.200932159502848e-07, |
|
"loss": 0.1751, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 3.9855612482533767, |
|
"grad_norm": 0.279201457240123, |
|
"learning_rate": 1.5535991714137753e-07, |
|
"loss": 0.1732, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.990218910107126, |
|
"grad_norm": 0.25945923118786696, |
|
"learning_rate": 9.062661833247022e-08, |
|
"loss": 0.1738, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 3.9948765719608756, |
|
"grad_norm": 0.2669644915648517, |
|
"learning_rate": 2.589331952356292e-08, |
|
"loss": 0.1721, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.9967396367023755, |
|
"step": 4292, |
|
"total_flos": 3.675734279595229e+18, |
|
"train_loss": 0.31530814574126986, |
|
"train_runtime": 166213.7776, |
|
"train_samples_per_second": 0.413, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.675734279595229e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|