|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8553299492385786, |
|
"eval_steps": 500, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006345177664974619, |
|
"grad_norm": 12.304139137268066, |
|
"learning_rate": 1.9027484143763215e-07, |
|
"loss": 1.4092, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012690355329949238, |
|
"grad_norm": 10.735240936279297, |
|
"learning_rate": 4.0169133192389007e-07, |
|
"loss": 1.3444, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01903553299492386, |
|
"grad_norm": 4.4380784034729, |
|
"learning_rate": 6.131078224101481e-07, |
|
"loss": 1.2567, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025380710659898477, |
|
"grad_norm": 3.0971062183380127, |
|
"learning_rate": 8.245243128964061e-07, |
|
"loss": 1.2201, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.031725888324873094, |
|
"grad_norm": 2.3528785705566406, |
|
"learning_rate": 1.0359408033826639e-06, |
|
"loss": 1.1005, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03807106598984772, |
|
"grad_norm": 1.9325449466705322, |
|
"learning_rate": 1.2473572938689219e-06, |
|
"loss": 1.0258, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.044416243654822336, |
|
"grad_norm": 1.9481005668640137, |
|
"learning_rate": 1.4587737843551796e-06, |
|
"loss": 0.9549, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.050761421319796954, |
|
"grad_norm": 1.3744746446609497, |
|
"learning_rate": 1.6701902748414379e-06, |
|
"loss": 0.9397, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05710659898477157, |
|
"grad_norm": 1.3208822011947632, |
|
"learning_rate": 1.8816067653276956e-06, |
|
"loss": 0.9581, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06345177664974619, |
|
"grad_norm": 1.578454613685608, |
|
"learning_rate": 2.0930232558139536e-06, |
|
"loss": 0.8835, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06979695431472081, |
|
"grad_norm": 1.7314599752426147, |
|
"learning_rate": 2.3044397463002116e-06, |
|
"loss": 0.877, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07614213197969544, |
|
"grad_norm": 1.690652847290039, |
|
"learning_rate": 2.5158562367864696e-06, |
|
"loss": 0.8674, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08248730964467005, |
|
"grad_norm": 1.4886319637298584, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 0.8124, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08883248730964467, |
|
"grad_norm": 1.5932313203811646, |
|
"learning_rate": 2.9386892177589852e-06, |
|
"loss": 0.8825, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09517766497461928, |
|
"grad_norm": 1.7353770732879639, |
|
"learning_rate": 3.1501057082452436e-06, |
|
"loss": 0.8381, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 1.5052095651626587, |
|
"learning_rate": 3.3615221987315012e-06, |
|
"loss": 0.8094, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10786802030456853, |
|
"grad_norm": 1.5068026781082153, |
|
"learning_rate": 3.5729386892177592e-06, |
|
"loss": 0.8088, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11421319796954314, |
|
"grad_norm": 1.3972314596176147, |
|
"learning_rate": 3.7843551797040172e-06, |
|
"loss": 0.7807, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12055837563451777, |
|
"grad_norm": 1.4561253786087036, |
|
"learning_rate": 3.995771670190275e-06, |
|
"loss": 0.751, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12690355329949238, |
|
"grad_norm": 1.1900990009307861, |
|
"learning_rate": 4.207188160676533e-06, |
|
"loss": 0.7526, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13324873096446702, |
|
"grad_norm": 1.2069578170776367, |
|
"learning_rate": 4.418604651162791e-06, |
|
"loss": 0.737, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13959390862944163, |
|
"grad_norm": 1.3006811141967773, |
|
"learning_rate": 4.630021141649049e-06, |
|
"loss": 0.757, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14593908629441624, |
|
"grad_norm": 1.1366584300994873, |
|
"learning_rate": 4.841437632135307e-06, |
|
"loss": 0.7355, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15228426395939088, |
|
"grad_norm": 1.0923043489456177, |
|
"learning_rate": 5.052854122621564e-06, |
|
"loss": 0.7273, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15862944162436549, |
|
"grad_norm": 1.1340067386627197, |
|
"learning_rate": 5.264270613107823e-06, |
|
"loss": 0.7093, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1649746192893401, |
|
"grad_norm": 1.0045281648635864, |
|
"learning_rate": 5.47568710359408e-06, |
|
"loss": 0.709, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1713197969543147, |
|
"grad_norm": 1.3080400228500366, |
|
"learning_rate": 5.687103594080339e-06, |
|
"loss": 0.7142, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17766497461928935, |
|
"grad_norm": 1.4830659627914429, |
|
"learning_rate": 5.898520084566597e-06, |
|
"loss": 0.7233, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18401015228426396, |
|
"grad_norm": 1.295798897743225, |
|
"learning_rate": 6.109936575052855e-06, |
|
"loss": 0.7254, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19035532994923857, |
|
"grad_norm": 1.1951725482940674, |
|
"learning_rate": 6.321353065539113e-06, |
|
"loss": 0.7008, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1967005076142132, |
|
"grad_norm": 1.1962999105453491, |
|
"learning_rate": 6.53276955602537e-06, |
|
"loss": 0.6697, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 1.0768781900405884, |
|
"learning_rate": 6.744186046511628e-06, |
|
"loss": 0.6688, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.20939086294416243, |
|
"grad_norm": 1.2655526399612427, |
|
"learning_rate": 6.955602536997886e-06, |
|
"loss": 0.7098, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21573604060913706, |
|
"grad_norm": 1.1732734441757202, |
|
"learning_rate": 7.167019027484144e-06, |
|
"loss": 0.6961, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22208121827411167, |
|
"grad_norm": 1.4146960973739624, |
|
"learning_rate": 7.378435517970403e-06, |
|
"loss": 0.6581, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22842639593908629, |
|
"grad_norm": 1.0180368423461914, |
|
"learning_rate": 7.58985200845666e-06, |
|
"loss": 0.636, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.23477157360406092, |
|
"grad_norm": 1.1763561964035034, |
|
"learning_rate": 7.801268498942918e-06, |
|
"loss": 0.6695, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24111675126903553, |
|
"grad_norm": 1.120521068572998, |
|
"learning_rate": 8.012684989429176e-06, |
|
"loss": 0.6658, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.24746192893401014, |
|
"grad_norm": 1.070609450340271, |
|
"learning_rate": 8.224101479915433e-06, |
|
"loss": 0.6528, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"grad_norm": 1.404994249343872, |
|
"learning_rate": 8.435517970401692e-06, |
|
"loss": 0.6525, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26015228426395937, |
|
"grad_norm": 1.3568419218063354, |
|
"learning_rate": 8.64693446088795e-06, |
|
"loss": 0.6525, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.26649746192893403, |
|
"grad_norm": 1.3468185663223267, |
|
"learning_rate": 8.858350951374208e-06, |
|
"loss": 0.641, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.27284263959390864, |
|
"grad_norm": 1.0951420068740845, |
|
"learning_rate": 9.069767441860465e-06, |
|
"loss": 0.6453, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.27918781725888325, |
|
"grad_norm": 1.030259370803833, |
|
"learning_rate": 9.281183932346723e-06, |
|
"loss": 0.6138, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.28553299492385786, |
|
"grad_norm": 1.1757938861846924, |
|
"learning_rate": 9.492600422832982e-06, |
|
"loss": 0.6787, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2918781725888325, |
|
"grad_norm": 1.3138433694839478, |
|
"learning_rate": 9.70401691331924e-06, |
|
"loss": 0.6633, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2982233502538071, |
|
"grad_norm": 1.3092707395553589, |
|
"learning_rate": 9.915433403805497e-06, |
|
"loss": 0.6432, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.30456852791878175, |
|
"grad_norm": 1.2927078008651733, |
|
"learning_rate": 9.999950938319974e-06, |
|
"loss": 0.6266, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31091370558375636, |
|
"grad_norm": 1.33150053024292, |
|
"learning_rate": 9.999651120428776e-06, |
|
"loss": 0.6427, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.31725888324873097, |
|
"grad_norm": 1.2657496929168701, |
|
"learning_rate": 9.999078757459388e-06, |
|
"loss": 0.6457, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3236040609137056, |
|
"grad_norm": 1.6883960962295532, |
|
"learning_rate": 9.998233880612932e-06, |
|
"loss": 0.6137, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3299492385786802, |
|
"grad_norm": 0.9815077781677246, |
|
"learning_rate": 9.997116535946028e-06, |
|
"loss": 0.6069, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3362944162436548, |
|
"grad_norm": 1.3186026811599731, |
|
"learning_rate": 9.99572678436828e-06, |
|
"loss": 0.6024, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3426395939086294, |
|
"grad_norm": 1.6290111541748047, |
|
"learning_rate": 9.994064701638969e-06, |
|
"loss": 0.6273, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3489847715736041, |
|
"grad_norm": 1.3211804628372192, |
|
"learning_rate": 9.992130378362908e-06, |
|
"loss": 0.6068, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3553299492385787, |
|
"grad_norm": 1.619232177734375, |
|
"learning_rate": 9.989923919985512e-06, |
|
"loss": 0.612, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3616751269035533, |
|
"grad_norm": 1.0001276731491089, |
|
"learning_rate": 9.987445446787049e-06, |
|
"loss": 0.5687, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3680203045685279, |
|
"grad_norm": 1.2668827772140503, |
|
"learning_rate": 9.984695093876081e-06, |
|
"loss": 0.5723, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3743654822335025, |
|
"grad_norm": 1.1758859157562256, |
|
"learning_rate": 9.981673011182098e-06, |
|
"loss": 0.5963, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38071065989847713, |
|
"grad_norm": 1.4700498580932617, |
|
"learning_rate": 9.978379363447348e-06, |
|
"loss": 0.5682, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3870558375634518, |
|
"grad_norm": 1.7378568649291992, |
|
"learning_rate": 9.974814330217858e-06, |
|
"loss": 0.6286, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3934010152284264, |
|
"grad_norm": 1.5732265710830688, |
|
"learning_rate": 9.970978105833632e-06, |
|
"loss": 0.5464, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.399746192893401, |
|
"grad_norm": 1.4477766752243042, |
|
"learning_rate": 9.966870899418087e-06, |
|
"loss": 0.5806, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 1.5664384365081787, |
|
"learning_rate": 9.96249293486662e-06, |
|
"loss": 0.5868, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41243654822335024, |
|
"grad_norm": 1.242577075958252, |
|
"learning_rate": 9.957844450834418e-06, |
|
"loss": 0.5943, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.41878172588832485, |
|
"grad_norm": 1.3932079076766968, |
|
"learning_rate": 9.952925700723455e-06, |
|
"loss": 0.5582, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4251269035532995, |
|
"grad_norm": 1.4832308292388916, |
|
"learning_rate": 9.947736952668667e-06, |
|
"loss": 0.561, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.43147208121827413, |
|
"grad_norm": 1.8345366716384888, |
|
"learning_rate": 9.942278489523338e-06, |
|
"loss": 0.5459, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.43781725888324874, |
|
"grad_norm": 1.1875063180923462, |
|
"learning_rate": 9.936550608843685e-06, |
|
"loss": 0.5267, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44416243654822335, |
|
"grad_norm": 1.4732545614242554, |
|
"learning_rate": 9.930553622872631e-06, |
|
"loss": 0.5814, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45050761421319796, |
|
"grad_norm": 1.7493573427200317, |
|
"learning_rate": 9.924287858522789e-06, |
|
"loss": 0.5633, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.45685279187817257, |
|
"grad_norm": 1.4842727184295654, |
|
"learning_rate": 9.917753657358638e-06, |
|
"loss": 0.53, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4631979695431472, |
|
"grad_norm": 1.6605039834976196, |
|
"learning_rate": 9.910951375577907e-06, |
|
"loss": 0.5231, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.46954314720812185, |
|
"grad_norm": 1.6541188955307007, |
|
"learning_rate": 9.903881383992153e-06, |
|
"loss": 0.5268, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.47588832487309646, |
|
"grad_norm": 1.8268778324127197, |
|
"learning_rate": 9.89654406800655e-06, |
|
"loss": 0.49, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48223350253807107, |
|
"grad_norm": 1.4834731817245483, |
|
"learning_rate": 9.88893982759888e-06, |
|
"loss": 0.5045, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4885786802030457, |
|
"grad_norm": 1.717140555381775, |
|
"learning_rate": 9.881069077297724e-06, |
|
"loss": 0.496, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4949238578680203, |
|
"grad_norm": 1.0741287469863892, |
|
"learning_rate": 9.872932246159873e-06, |
|
"loss": 0.4679, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.501269035532995, |
|
"grad_norm": 1.2269752025604248, |
|
"learning_rate": 9.864529777746929e-06, |
|
"loss": 0.4772, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 1.6613504886627197, |
|
"learning_rate": 9.85586213010114e-06, |
|
"loss": 0.5008, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5139593908629442, |
|
"grad_norm": 1.2009035348892212, |
|
"learning_rate": 9.846929775720411e-06, |
|
"loss": 0.5038, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5203045685279187, |
|
"grad_norm": 1.5814530849456787, |
|
"learning_rate": 9.837733201532565e-06, |
|
"loss": 0.5021, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5266497461928934, |
|
"grad_norm": 1.6952024698257446, |
|
"learning_rate": 9.82827290886879e-06, |
|
"loss": 0.4845, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5329949238578681, |
|
"grad_norm": 1.3526102304458618, |
|
"learning_rate": 9.818549413436309e-06, |
|
"loss": 0.4952, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5393401015228426, |
|
"grad_norm": 1.7655881643295288, |
|
"learning_rate": 9.80856324529027e-06, |
|
"loss": 0.4678, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5456852791878173, |
|
"grad_norm": 1.391158103942871, |
|
"learning_rate": 9.79831494880486e-06, |
|
"loss": 0.4702, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5520304568527918, |
|
"grad_norm": 1.3191405534744263, |
|
"learning_rate": 9.787805082643604e-06, |
|
"loss": 0.4394, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5583756345177665, |
|
"grad_norm": 1.537750005722046, |
|
"learning_rate": 9.777034219728943e-06, |
|
"loss": 0.4172, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5647208121827412, |
|
"grad_norm": 1.953177809715271, |
|
"learning_rate": 9.76600294721098e-06, |
|
"loss": 0.4846, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5710659898477157, |
|
"grad_norm": 1.3089863061904907, |
|
"learning_rate": 9.754711866435477e-06, |
|
"loss": 0.414, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5774111675126904, |
|
"grad_norm": 1.6026610136032104, |
|
"learning_rate": 9.743161592911088e-06, |
|
"loss": 0.5243, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.583756345177665, |
|
"grad_norm": 1.7620460987091064, |
|
"learning_rate": 9.731352756275781e-06, |
|
"loss": 0.4181, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5901015228426396, |
|
"grad_norm": 1.6068378686904907, |
|
"learning_rate": 9.719286000262533e-06, |
|
"loss": 0.3713, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5964467005076142, |
|
"grad_norm": 2.3091704845428467, |
|
"learning_rate": 9.706961982664239e-06, |
|
"loss": 0.4562, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6027918781725888, |
|
"grad_norm": 2.353106737136841, |
|
"learning_rate": 9.69438137529784e-06, |
|
"loss": 0.4361, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 1.599411129951477, |
|
"learning_rate": 9.681544863967713e-06, |
|
"loss": 0.4496, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6154822335025381, |
|
"grad_norm": 1.5869901180267334, |
|
"learning_rate": 9.668453148428282e-06, |
|
"loss": 0.4046, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6218274111675127, |
|
"grad_norm": 1.7548712491989136, |
|
"learning_rate": 9.65510694234587e-06, |
|
"loss": 0.3627, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6281725888324873, |
|
"grad_norm": 1.3313032388687134, |
|
"learning_rate": 9.641506973259798e-06, |
|
"loss": 0.4176, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6345177664974619, |
|
"grad_norm": 3.056716203689575, |
|
"learning_rate": 9.627653982542722e-06, |
|
"loss": 0.4283, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6408629441624365, |
|
"grad_norm": 1.8358234167099, |
|
"learning_rate": 9.613548725360224e-06, |
|
"loss": 0.4217, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6472081218274112, |
|
"grad_norm": 1.823522686958313, |
|
"learning_rate": 9.599191970629638e-06, |
|
"loss": 0.437, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6535532994923858, |
|
"grad_norm": 1.779383897781372, |
|
"learning_rate": 9.584584500978144e-06, |
|
"loss": 0.3995, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6598984771573604, |
|
"grad_norm": 1.7531787157058716, |
|
"learning_rate": 9.569727112700093e-06, |
|
"loss": 0.4449, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.666243654822335, |
|
"grad_norm": 2.1453044414520264, |
|
"learning_rate": 9.55462061571361e-06, |
|
"loss": 0.3754, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6725888324873096, |
|
"grad_norm": 1.6521024703979492, |
|
"learning_rate": 9.539265833516434e-06, |
|
"loss": 0.419, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6789340101522843, |
|
"grad_norm": 1.616896152496338, |
|
"learning_rate": 9.523663603141032e-06, |
|
"loss": 0.4076, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6852791878172588, |
|
"grad_norm": 1.219354510307312, |
|
"learning_rate": 9.507814775108971e-06, |
|
"loss": 0.4092, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6916243654822335, |
|
"grad_norm": 22.454200744628906, |
|
"learning_rate": 9.49172021338455e-06, |
|
"loss": 0.4034, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6979695431472082, |
|
"grad_norm": 1.8505566120147705, |
|
"learning_rate": 9.475380795327702e-06, |
|
"loss": 0.3824, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7043147208121827, |
|
"grad_norm": 1.492254376411438, |
|
"learning_rate": 9.458797411646176e-06, |
|
"loss": 0.3405, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7106598984771574, |
|
"grad_norm": 1.774132251739502, |
|
"learning_rate": 9.441970966346965e-06, |
|
"loss": 0.3425, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7170050761421319, |
|
"grad_norm": 1.2463436126708984, |
|
"learning_rate": 9.424902376687045e-06, |
|
"loss": 0.3594, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7233502538071066, |
|
"grad_norm": 1.515215277671814, |
|
"learning_rate": 9.407592573123359e-06, |
|
"loss": 0.359, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7296954314720813, |
|
"grad_norm": 3.103351593017578, |
|
"learning_rate": 9.390042499262102e-06, |
|
"loss": 0.3554, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7360406091370558, |
|
"grad_norm": 1.8471239805221558, |
|
"learning_rate": 9.372253111807276e-06, |
|
"loss": 0.3251, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7423857868020305, |
|
"grad_norm": 1.8411760330200195, |
|
"learning_rate": 9.354225380508548e-06, |
|
"loss": 0.3233, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.748730964467005, |
|
"grad_norm": 1.499944806098938, |
|
"learning_rate": 9.33596028810838e-06, |
|
"loss": 0.3718, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7550761421319797, |
|
"grad_norm": 2.158557653427124, |
|
"learning_rate": 9.317458830288446e-06, |
|
"loss": 0.3463, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7614213197969543, |
|
"grad_norm": 1.5045950412750244, |
|
"learning_rate": 9.29872201561538e-06, |
|
"loss": 0.3682, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7677664974619289, |
|
"grad_norm": 1.9903945922851562, |
|
"learning_rate": 9.279750865485772e-06, |
|
"loss": 0.3149, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7741116751269036, |
|
"grad_norm": 1.7139513492584229, |
|
"learning_rate": 9.260546414070504e-06, |
|
"loss": 0.2947, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7804568527918782, |
|
"grad_norm": 2.4074273109436035, |
|
"learning_rate": 9.241109708258362e-06, |
|
"loss": 0.3451, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7868020304568528, |
|
"grad_norm": 1.736325740814209, |
|
"learning_rate": 9.221441807598981e-06, |
|
"loss": 0.3156, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7931472081218274, |
|
"grad_norm": 1.722331166267395, |
|
"learning_rate": 9.201543784245076e-06, |
|
"loss": 0.2895, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.799492385786802, |
|
"grad_norm": 1.800851583480835, |
|
"learning_rate": 9.181416722893998e-06, |
|
"loss": 0.2907, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8058375634517766, |
|
"grad_norm": 2.2214279174804688, |
|
"learning_rate": 9.161061720728606e-06, |
|
"loss": 0.3074, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 1.5840632915496826, |
|
"learning_rate": 9.140479887357454e-06, |
|
"loss": 0.2684, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8185279187817259, |
|
"grad_norm": 2.0567562580108643, |
|
"learning_rate": 9.119672344754307e-06, |
|
"loss": 0.2777, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8248730964467005, |
|
"grad_norm": 2.080697774887085, |
|
"learning_rate": 9.098640227196978e-06, |
|
"loss": 0.294, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8312182741116751, |
|
"grad_norm": 2.2059218883514404, |
|
"learning_rate": 9.077384681205487e-06, |
|
"loss": 0.3483, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8375634517766497, |
|
"grad_norm": 1.5565263032913208, |
|
"learning_rate": 9.055906865479574e-06, |
|
"loss": 0.2744, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8439086294416244, |
|
"grad_norm": 1.5794973373413086, |
|
"learning_rate": 9.034207950835527e-06, |
|
"loss": 0.2803, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.850253807106599, |
|
"grad_norm": 1.8375296592712402, |
|
"learning_rate": 9.01228912014236e-06, |
|
"loss": 0.2805, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8565989847715736, |
|
"grad_norm": 1.5420727729797363, |
|
"learning_rate": 8.99015156825733e-06, |
|
"loss": 0.2774, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8629441624365483, |
|
"grad_norm": 1.6844383478164673, |
|
"learning_rate": 8.967796501960805e-06, |
|
"loss": 0.2724, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8692893401015228, |
|
"grad_norm": 2.27237606048584, |
|
"learning_rate": 8.945225139890468e-06, |
|
"loss": 0.2514, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8756345177664975, |
|
"grad_norm": 1.6022717952728271, |
|
"learning_rate": 8.92243871247491e-06, |
|
"loss": 0.2675, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.881979695431472, |
|
"grad_norm": 1.3979642391204834, |
|
"learning_rate": 8.899438461866526e-06, |
|
"loss": 0.2404, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8883248730964467, |
|
"grad_norm": 1.8629894256591797, |
|
"learning_rate": 8.876225641873822e-06, |
|
"loss": 0.2744, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8946700507614214, |
|
"grad_norm": 1.6122556924819946, |
|
"learning_rate": 8.852801517893063e-06, |
|
"loss": 0.2814, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9010152284263959, |
|
"grad_norm": 2.0331978797912598, |
|
"learning_rate": 8.829167366839287e-06, |
|
"loss": 0.2728, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9073604060913706, |
|
"grad_norm": 1.5905483961105347, |
|
"learning_rate": 8.805324477076697e-06, |
|
"loss": 0.2503, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9137055837563451, |
|
"grad_norm": 1.9675116539001465, |
|
"learning_rate": 8.781274148348438e-06, |
|
"loss": 0.2241, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9200507614213198, |
|
"grad_norm": 1.981604814529419, |
|
"learning_rate": 8.757017691705732e-06, |
|
"loss": 0.2789, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9263959390862944, |
|
"grad_norm": 1.6477928161621094, |
|
"learning_rate": 8.732556429436419e-06, |
|
"loss": 0.2442, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.932741116751269, |
|
"grad_norm": 1.875747799873352, |
|
"learning_rate": 8.70789169499287e-06, |
|
"loss": 0.2372, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.9390862944162437, |
|
"grad_norm": 1.9763504266738892, |
|
"learning_rate": 8.683024832919295e-06, |
|
"loss": 0.2493, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.9454314720812182, |
|
"grad_norm": 2.166445016860962, |
|
"learning_rate": 8.657957198778455e-06, |
|
"loss": 0.2491, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.9517766497461929, |
|
"grad_norm": 2.062021493911743, |
|
"learning_rate": 8.632690159077758e-06, |
|
"loss": 0.2611, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9581218274111675, |
|
"grad_norm": 1.5676127672195435, |
|
"learning_rate": 8.60722509119478e-06, |
|
"loss": 0.2475, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9644670050761421, |
|
"grad_norm": 1.734596610069275, |
|
"learning_rate": 8.581563383302158e-06, |
|
"loss": 0.2499, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9708121827411168, |
|
"grad_norm": 2.276888132095337, |
|
"learning_rate": 8.555706434291944e-06, |
|
"loss": 0.2052, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9771573604060914, |
|
"grad_norm": 1.5414533615112305, |
|
"learning_rate": 8.529655653699323e-06, |
|
"loss": 0.2008, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.983502538071066, |
|
"grad_norm": 2.0116498470306396, |
|
"learning_rate": 8.503412461625792e-06, |
|
"loss": 0.2088, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9898477157360406, |
|
"grad_norm": 2.507782220840454, |
|
"learning_rate": 8.47697828866174e-06, |
|
"loss": 0.2212, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9961928934010152, |
|
"grad_norm": 1.5416207313537598, |
|
"learning_rate": 8.450354575808463e-06, |
|
"loss": 0.227, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.00253807106599, |
|
"grad_norm": 1.7348345518112183, |
|
"learning_rate": 8.423542774399606e-06, |
|
"loss": 0.2192, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.0088832487309645, |
|
"grad_norm": 1.8863823413848877, |
|
"learning_rate": 8.396544346022055e-06, |
|
"loss": 0.159, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 1.3554282188415527, |
|
"learning_rate": 8.36936076243626e-06, |
|
"loss": 0.1519, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0215736040609138, |
|
"grad_norm": 1.915385127067566, |
|
"learning_rate": 8.341993505496e-06, |
|
"loss": 0.1667, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.0279187817258884, |
|
"grad_norm": 2.683910369873047, |
|
"learning_rate": 8.314444067067611e-06, |
|
"loss": 0.1672, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.034263959390863, |
|
"grad_norm": 3.2767446041107178, |
|
"learning_rate": 8.286713948948646e-06, |
|
"loss": 0.151, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.0406091370558375, |
|
"grad_norm": 1.7172635793685913, |
|
"learning_rate": 8.258804662786031e-06, |
|
"loss": 0.1365, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.0469543147208122, |
|
"grad_norm": 1.9492729902267456, |
|
"learning_rate": 8.230717729993637e-06, |
|
"loss": 0.1521, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.0532994923857868, |
|
"grad_norm": 1.3974714279174805, |
|
"learning_rate": 8.202454681669352e-06, |
|
"loss": 0.1784, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.0596446700507614, |
|
"grad_norm": 1.5528488159179688, |
|
"learning_rate": 8.17401705851163e-06, |
|
"loss": 0.145, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.0659898477157361, |
|
"grad_norm": 4.622862815856934, |
|
"learning_rate": 8.14540641073548e-06, |
|
"loss": 0.149, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.0723350253807107, |
|
"grad_norm": 1.4450290203094482, |
|
"learning_rate": 8.116624297987973e-06, |
|
"loss": 0.1354, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.0786802030456852, |
|
"grad_norm": 1.5473392009735107, |
|
"learning_rate": 8.087672289263228e-06, |
|
"loss": 0.1355, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0850253807106598, |
|
"grad_norm": 1.55717134475708, |
|
"learning_rate": 8.058551962816858e-06, |
|
"loss": 0.1533, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.0913705583756346, |
|
"grad_norm": 2.583096742630005, |
|
"learning_rate": 8.029264906079962e-06, |
|
"loss": 0.1498, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.0977157360406091, |
|
"grad_norm": 3.534912109375, |
|
"learning_rate": 7.99981271557257e-06, |
|
"loss": 0.1653, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.1040609137055837, |
|
"grad_norm": 1.350325345993042, |
|
"learning_rate": 7.970196996816622e-06, |
|
"loss": 0.1253, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.1104060913705585, |
|
"grad_norm": 1.4373643398284912, |
|
"learning_rate": 7.940419364248445e-06, |
|
"loss": 0.1681, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.116751269035533, |
|
"grad_norm": 2.416491985321045, |
|
"learning_rate": 7.910481441130739e-06, |
|
"loss": 0.1382, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.1230964467005076, |
|
"grad_norm": 1.4168888330459595, |
|
"learning_rate": 7.880384859464102e-06, |
|
"loss": 0.1286, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.1294416243654823, |
|
"grad_norm": 1.4525187015533447, |
|
"learning_rate": 7.850131259898051e-06, |
|
"loss": 0.1454, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.135786802030457, |
|
"grad_norm": 2.431896448135376, |
|
"learning_rate": 7.819722291641591e-06, |
|
"loss": 0.159, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.1421319796954315, |
|
"grad_norm": 1.982692837715149, |
|
"learning_rate": 7.789159612373317e-06, |
|
"loss": 0.1201, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.148477157360406, |
|
"grad_norm": 1.786580204963684, |
|
"learning_rate": 7.758444888151042e-06, |
|
"loss": 0.1274, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.1548223350253808, |
|
"grad_norm": 1.0583122968673706, |
|
"learning_rate": 7.727579793320977e-06, |
|
"loss": 0.1246, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.1611675126903553, |
|
"grad_norm": 1.2649511098861694, |
|
"learning_rate": 7.69656601042646e-06, |
|
"loss": 0.1296, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.16751269035533, |
|
"grad_norm": 1.5088468790054321, |
|
"learning_rate": 7.665405230116232e-06, |
|
"loss": 0.1549, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.1738578680203045, |
|
"grad_norm": 1.6474385261535645, |
|
"learning_rate": 7.634099151052283e-06, |
|
"loss": 0.1114, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.1802030456852792, |
|
"grad_norm": 1.665197730064392, |
|
"learning_rate": 7.602649479817242e-06, |
|
"loss": 0.119, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.1865482233502538, |
|
"grad_norm": 1.6402256488800049, |
|
"learning_rate": 7.5710579308213576e-06, |
|
"loss": 0.105, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.1928934010152283, |
|
"grad_norm": 1.4458770751953125, |
|
"learning_rate": 7.539326226209032e-06, |
|
"loss": 0.1574, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.1992385786802031, |
|
"grad_norm": 1.4857584238052368, |
|
"learning_rate": 7.507456095764942e-06, |
|
"loss": 0.1265, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.2055837563451777, |
|
"grad_norm": 1.7672957181930542, |
|
"learning_rate": 7.475449276819753e-06, |
|
"loss": 0.1152, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.2119289340101522, |
|
"grad_norm": 1.756518006324768, |
|
"learning_rate": 7.443307514155402e-06, |
|
"loss": 0.1051, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"grad_norm": 2.3999290466308594, |
|
"learning_rate": 7.411032559909991e-06, |
|
"loss": 0.1249, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.2246192893401016, |
|
"grad_norm": 2.726649522781372, |
|
"learning_rate": 7.378626173482268e-06, |
|
"loss": 0.1065, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.2309644670050761, |
|
"grad_norm": 1.4104615449905396, |
|
"learning_rate": 7.346090121435724e-06, |
|
"loss": 0.0982, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.2373096446700507, |
|
"grad_norm": 1.8831905126571655, |
|
"learning_rate": 7.313426177402281e-06, |
|
"loss": 0.1091, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.2436548223350254, |
|
"grad_norm": 2.125528573989868, |
|
"learning_rate": 7.2806361219856205e-06, |
|
"loss": 0.1197, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.8320462703704834, |
|
"learning_rate": 7.24772174266411e-06, |
|
"loss": 0.0979, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.2563451776649746, |
|
"grad_norm": 1.6644319295883179, |
|
"learning_rate": 7.214684833693362e-06, |
|
"loss": 0.1451, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.262690355329949, |
|
"grad_norm": 1.816611886024475, |
|
"learning_rate": 7.181527196008424e-06, |
|
"loss": 0.1111, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.2690355329949239, |
|
"grad_norm": 2.8035154342651367, |
|
"learning_rate": 7.148250637125611e-06, |
|
"loss": 0.0894, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2753807106598984, |
|
"grad_norm": 1.8045902252197266, |
|
"learning_rate": 7.114856971043963e-06, |
|
"loss": 0.0931, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.281725888324873, |
|
"grad_norm": 1.637097716331482, |
|
"learning_rate": 7.081348018146367e-06, |
|
"loss": 0.1572, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.2880710659898478, |
|
"grad_norm": 1.4267776012420654, |
|
"learning_rate": 7.047725605100317e-06, |
|
"loss": 0.1071, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.2944162436548223, |
|
"grad_norm": 2.571660280227661, |
|
"learning_rate": 7.01399156475834e-06, |
|
"loss": 0.1158, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.3007614213197969, |
|
"grad_norm": 2.324598789215088, |
|
"learning_rate": 6.980147736058083e-06, |
|
"loss": 0.0959, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.3071065989847717, |
|
"grad_norm": 1.4909052848815918, |
|
"learning_rate": 6.946195963922064e-06, |
|
"loss": 0.1202, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.3134517766497462, |
|
"grad_norm": 1.6092907190322876, |
|
"learning_rate": 6.9121380991571065e-06, |
|
"loss": 0.0805, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.3197969543147208, |
|
"grad_norm": 1.2184277772903442, |
|
"learning_rate": 6.877975998353433e-06, |
|
"loss": 0.1132, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.3261421319796955, |
|
"grad_norm": 1.2614070177078247, |
|
"learning_rate": 6.8437115237834765e-06, |
|
"loss": 0.089, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.33248730964467, |
|
"grad_norm": 1.7008192539215088, |
|
"learning_rate": 6.809346543300346e-06, |
|
"loss": 0.0787, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3388324873096447, |
|
"grad_norm": 1.3894529342651367, |
|
"learning_rate": 6.774882930236015e-06, |
|
"loss": 0.0962, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.3451776649746192, |
|
"grad_norm": 1.7126891613006592, |
|
"learning_rate": 6.740322563299195e-06, |
|
"loss": 0.0952, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.351522842639594, |
|
"grad_norm": 1.7561262845993042, |
|
"learning_rate": 6.705667326472926e-06, |
|
"loss": 0.0989, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.3578680203045685, |
|
"grad_norm": 1.4162139892578125, |
|
"learning_rate": 6.6709191089118685e-06, |
|
"loss": 0.1046, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.364213197969543, |
|
"grad_norm": 1.8884022235870361, |
|
"learning_rate": 6.636079804839329e-06, |
|
"loss": 0.0847, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.3705583756345177, |
|
"grad_norm": 1.4617987871170044, |
|
"learning_rate": 6.601151313443997e-06, |
|
"loss": 0.0858, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.3769035532994924, |
|
"grad_norm": 1.5476235151290894, |
|
"learning_rate": 6.566135538776413e-06, |
|
"loss": 0.0907, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.383248730964467, |
|
"grad_norm": 1.8879975080490112, |
|
"learning_rate": 6.531034389645175e-06, |
|
"loss": 0.1255, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.3895939086294415, |
|
"grad_norm": 1.563038945198059, |
|
"learning_rate": 6.495849779512879e-06, |
|
"loss": 0.084, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.3959390862944163, |
|
"grad_norm": 2.6775851249694824, |
|
"learning_rate": 6.460583626391827e-06, |
|
"loss": 0.0957, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4022842639593909, |
|
"grad_norm": 5.497508525848389, |
|
"learning_rate": 6.4252378527394475e-06, |
|
"loss": 0.0882, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.4086294416243654, |
|
"grad_norm": 2.2709615230560303, |
|
"learning_rate": 6.3898143853535145e-06, |
|
"loss": 0.1038, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.4149746192893402, |
|
"grad_norm": 2.0166831016540527, |
|
"learning_rate": 6.354315155267105e-06, |
|
"loss": 0.0778, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.4213197969543148, |
|
"grad_norm": 1.4909207820892334, |
|
"learning_rate": 6.318742097643336e-06, |
|
"loss": 0.1091, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.4276649746192893, |
|
"grad_norm": 2.3677256107330322, |
|
"learning_rate": 6.283097151669869e-06, |
|
"loss": 0.1019, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.434010152284264, |
|
"grad_norm": 3.072751045227051, |
|
"learning_rate": 6.247382260453203e-06, |
|
"loss": 0.1004, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.4403553299492386, |
|
"grad_norm": 2.3845341205596924, |
|
"learning_rate": 6.211599370912752e-06, |
|
"loss": 0.0886, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.4467005076142132, |
|
"grad_norm": 4.395678997039795, |
|
"learning_rate": 6.175750433674708e-06, |
|
"loss": 0.1095, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.4530456852791878, |
|
"grad_norm": 1.326743721961975, |
|
"learning_rate": 6.139837402965705e-06, |
|
"loss": 0.1021, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.4593908629441623, |
|
"grad_norm": 1.4270453453063965, |
|
"learning_rate": 6.103862236506303e-06, |
|
"loss": 0.0744, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.465736040609137, |
|
"grad_norm": 1.5374149084091187, |
|
"learning_rate": 6.067826895404249e-06, |
|
"loss": 0.0757, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.4720812182741116, |
|
"grad_norm": 1.5649033784866333, |
|
"learning_rate": 6.031733344047581e-06, |
|
"loss": 0.1023, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.4784263959390862, |
|
"grad_norm": 1.169797420501709, |
|
"learning_rate": 5.995583549997542e-06, |
|
"loss": 0.0654, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.484771573604061, |
|
"grad_norm": 1.8578475713729858, |
|
"learning_rate": 5.959379483881327e-06, |
|
"loss": 0.0819, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.4911167512690355, |
|
"grad_norm": 1.6423859596252441, |
|
"learning_rate": 5.923123119284646e-06, |
|
"loss": 0.0663, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.49746192893401, |
|
"grad_norm": 1.1731383800506592, |
|
"learning_rate": 5.886816432644155e-06, |
|
"loss": 0.0932, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.5038071065989849, |
|
"grad_norm": 1.0412118434906006, |
|
"learning_rate": 5.850461403139702e-06, |
|
"loss": 0.0807, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.5101522842639594, |
|
"grad_norm": 1.5270987749099731, |
|
"learning_rate": 5.814060012586443e-06, |
|
"loss": 0.0747, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.516497461928934, |
|
"grad_norm": 1.9564098119735718, |
|
"learning_rate": 5.777614245326802e-06, |
|
"loss": 0.0715, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"grad_norm": 1.6264362335205078, |
|
"learning_rate": 5.7411260881223045e-06, |
|
"loss": 0.0947, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.529187817258883, |
|
"grad_norm": 1.0679928064346313, |
|
"learning_rate": 5.704597530045272e-06, |
|
"loss": 0.0669, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.5355329949238579, |
|
"grad_norm": 1.393947720527649, |
|
"learning_rate": 5.6680305623703926e-06, |
|
"loss": 0.089, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.5418781725888326, |
|
"grad_norm": 1.8824158906936646, |
|
"learning_rate": 5.631427178466166e-06, |
|
"loss": 0.071, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.548223350253807, |
|
"grad_norm": 1.060774326324463, |
|
"learning_rate": 5.594789373686247e-06, |
|
"loss": 0.0747, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.5545685279187818, |
|
"grad_norm": 1.935646891593933, |
|
"learning_rate": 5.5581191452606664e-06, |
|
"loss": 0.0671, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.5609137055837563, |
|
"grad_norm": 1.2591124773025513, |
|
"learning_rate": 5.521418492186962e-06, |
|
"loss": 0.0796, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.5672588832487309, |
|
"grad_norm": 2.050698757171631, |
|
"learning_rate": 5.484689415121204e-06, |
|
"loss": 0.0724, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.5736040609137056, |
|
"grad_norm": 1.2225536108016968, |
|
"learning_rate": 5.447933916268933e-06, |
|
"loss": 0.0591, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.5799492385786802, |
|
"grad_norm": 4.785628318786621, |
|
"learning_rate": 5.411153999276016e-06, |
|
"loss": 0.0873, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.5862944162436547, |
|
"grad_norm": 2.2066152095794678, |
|
"learning_rate": 5.374351669119425e-06, |
|
"loss": 0.057, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5926395939086295, |
|
"grad_norm": 1.9447569847106934, |
|
"learning_rate": 5.337528931997934e-06, |
|
"loss": 0.0548, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.598984771573604, |
|
"grad_norm": 2.1758713722229004, |
|
"learning_rate": 5.3006877952227585e-06, |
|
"loss": 0.0674, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.6053299492385786, |
|
"grad_norm": 1.5067161321640015, |
|
"learning_rate": 5.263830267108129e-06, |
|
"loss": 0.0583, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.6116751269035534, |
|
"grad_norm": 1.6991007328033447, |
|
"learning_rate": 5.226958356861819e-06, |
|
"loss": 0.0521, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.618020304568528, |
|
"grad_norm": 1.2602826356887817, |
|
"learning_rate": 5.190074074475606e-06, |
|
"loss": 0.0674, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"grad_norm": 2.1869382858276367, |
|
"learning_rate": 5.153179430615716e-06, |
|
"loss": 0.062, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.6307106598984773, |
|
"grad_norm": 1.6224417686462402, |
|
"learning_rate": 5.116276436513201e-06, |
|
"loss": 0.0718, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.6370558375634516, |
|
"grad_norm": 2.291430711746216, |
|
"learning_rate": 5.079367103854311e-06, |
|
"loss": 0.0722, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.6434010152284264, |
|
"grad_norm": 1.0190826654434204, |
|
"learning_rate": 5.042453444670829e-06, |
|
"loss": 0.0612, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.649746192893401, |
|
"grad_norm": 1.6983177661895752, |
|
"learning_rate": 5.005537471230387e-06, |
|
"loss": 0.06, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.6560913705583755, |
|
"grad_norm": 1.5693427324295044, |
|
"learning_rate": 4.968621195926779e-06, |
|
"loss": 0.0674, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.6624365482233503, |
|
"grad_norm": 1.4258981943130493, |
|
"learning_rate": 4.931706631170246e-06, |
|
"loss": 0.0602, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.6687817258883249, |
|
"grad_norm": 1.9744484424591064, |
|
"learning_rate": 4.894795789277789e-06, |
|
"loss": 0.0657, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.6751269035532994, |
|
"grad_norm": 1.0477792024612427, |
|
"learning_rate": 4.857890682363461e-06, |
|
"loss": 0.0643, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.6814720812182742, |
|
"grad_norm": 1.2517801523208618, |
|
"learning_rate": 4.820993322228691e-06, |
|
"loss": 0.0574, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.6878172588832487, |
|
"grad_norm": 1.339064359664917, |
|
"learning_rate": 4.784105720252602e-06, |
|
"loss": 0.0639, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.6941624365482233, |
|
"grad_norm": 1.0788367986679077, |
|
"learning_rate": 4.747229887282379e-06, |
|
"loss": 0.044, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.700507614213198, |
|
"grad_norm": 0.8012908697128296, |
|
"learning_rate": 4.7103678335236395e-06, |
|
"loss": 0.0642, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.7068527918781726, |
|
"grad_norm": 1.975696086883545, |
|
"learning_rate": 4.673521568430859e-06, |
|
"loss": 0.0655, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.7131979695431472, |
|
"grad_norm": 1.7474173307418823, |
|
"learning_rate": 4.63669310059783e-06, |
|
"loss": 0.0447, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.719543147208122, |
|
"grad_norm": 0.9429912567138672, |
|
"learning_rate": 4.5998844376481665e-06, |
|
"loss": 0.0588, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.7258883248730963, |
|
"grad_norm": 2.345489025115967, |
|
"learning_rate": 4.5630975861258605e-06, |
|
"loss": 0.0637, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.732233502538071, |
|
"grad_norm": 0.8988242149353027, |
|
"learning_rate": 4.526334551385902e-06, |
|
"loss": 0.0613, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.7385786802030458, |
|
"grad_norm": 2.0134191513061523, |
|
"learning_rate": 4.489597337484961e-06, |
|
"loss": 0.0533, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.7449238578680202, |
|
"grad_norm": 1.8432866334915161, |
|
"learning_rate": 4.452887947072142e-06, |
|
"loss": 0.0684, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.751269035532995, |
|
"grad_norm": 3.151284694671631, |
|
"learning_rate": 4.416208381279812e-06, |
|
"loss": 0.0556, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.7576142131979695, |
|
"grad_norm": 1.051060676574707, |
|
"learning_rate": 4.379560639614513e-06, |
|
"loss": 0.0498, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.763959390862944, |
|
"grad_norm": 1.5683525800704956, |
|
"learning_rate": 4.3429467198479665e-06, |
|
"loss": 0.0524, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.7703045685279188, |
|
"grad_norm": 1.0461344718933105, |
|
"learning_rate": 4.306368617908163e-06, |
|
"loss": 0.0445, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.7766497461928934, |
|
"grad_norm": 1.2296735048294067, |
|
"learning_rate": 4.2698283277705655e-06, |
|
"loss": 0.0464, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.782994923857868, |
|
"grad_norm": 0.9869544506072998, |
|
"learning_rate": 4.23332784134941e-06, |
|
"loss": 0.0506, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.7893401015228427, |
|
"grad_norm": 2.624345541000366, |
|
"learning_rate": 4.196869148389114e-06, |
|
"loss": 0.0455, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.7956852791878173, |
|
"grad_norm": 2.0790648460388184, |
|
"learning_rate": 4.160454236355822e-06, |
|
"loss": 0.0465, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.8020304568527918, |
|
"grad_norm": 1.0878472328186035, |
|
"learning_rate": 4.124085090329056e-06, |
|
"loss": 0.0354, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.8083756345177666, |
|
"grad_norm": 1.4148125648498535, |
|
"learning_rate": 4.087763692893498e-06, |
|
"loss": 0.0378, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.8147208121827412, |
|
"grad_norm": 0.8988755941390991, |
|
"learning_rate": 4.051492024030925e-06, |
|
"loss": 0.0421, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.8210659898477157, |
|
"grad_norm": 2.1405270099639893, |
|
"learning_rate": 4.015272061012271e-06, |
|
"loss": 0.0647, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.8274111675126905, |
|
"grad_norm": 0.8886227607727051, |
|
"learning_rate": 3.979105778289832e-06, |
|
"loss": 0.0547, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.8337563451776648, |
|
"grad_norm": 1.402446985244751, |
|
"learning_rate": 3.942995147389648e-06, |
|
"loss": 0.0378, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.8401015228426396, |
|
"grad_norm": 1.283605933189392, |
|
"learning_rate": 3.9069421368040115e-06, |
|
"loss": 0.0488, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.8464467005076142, |
|
"grad_norm": 1.226680874824524, |
|
"learning_rate": 3.870948711884178e-06, |
|
"loss": 0.0382, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.8527918781725887, |
|
"grad_norm": 1.871385097503662, |
|
"learning_rate": 3.835016834733216e-06, |
|
"loss": 0.0441, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.8591370558375635, |
|
"grad_norm": 1.125570297241211, |
|
"learning_rate": 3.7991484640990506e-06, |
|
"loss": 0.0429, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.865482233502538, |
|
"grad_norm": 1.131261944770813, |
|
"learning_rate": 3.763345555267692e-06, |
|
"loss": 0.0404, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.8718274111675126, |
|
"grad_norm": 1.5131438970565796, |
|
"learning_rate": 3.727610059956641e-06, |
|
"loss": 0.0359, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.8781725888324874, |
|
"grad_norm": 0.8379979133605957, |
|
"learning_rate": 3.691943926208494e-06, |
|
"loss": 0.0508, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.884517766497462, |
|
"grad_norm": 1.1895625591278076, |
|
"learning_rate": 3.6563490982847577e-06, |
|
"loss": 0.034, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.8908629441624365, |
|
"grad_norm": 0.7952091097831726, |
|
"learning_rate": 3.620827516559854e-06, |
|
"loss": 0.0494, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.8972081218274113, |
|
"grad_norm": 1.2926766872406006, |
|
"learning_rate": 3.58538111741535e-06, |
|
"loss": 0.0483, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.9035532994923858, |
|
"grad_norm": 1.165218472480774, |
|
"learning_rate": 3.550011833134399e-06, |
|
"loss": 0.0446, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.9098984771573604, |
|
"grad_norm": 1.2693628072738647, |
|
"learning_rate": 3.5147215917964037e-06, |
|
"loss": 0.0296, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.9162436548223352, |
|
"grad_norm": 0.7264485955238342, |
|
"learning_rate": 3.4795123171719142e-06, |
|
"loss": 0.0488, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.9225888324873095, |
|
"grad_norm": 0.9121705889701843, |
|
"learning_rate": 3.4443859286177545e-06, |
|
"loss": 0.0299, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.9289340101522843, |
|
"grad_norm": 1.2310829162597656, |
|
"learning_rate": 3.4093443409723985e-06, |
|
"loss": 0.0389, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.9352791878172588, |
|
"grad_norm": 1.087215542793274, |
|
"learning_rate": 3.374389464451583e-06, |
|
"loss": 0.0367, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.9416243654822334, |
|
"grad_norm": 1.1739871501922607, |
|
"learning_rate": 3.339523204544176e-06, |
|
"loss": 0.0407, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.9479695431472082, |
|
"grad_norm": 0.9143801927566528, |
|
"learning_rate": 3.3047474619083043e-06, |
|
"loss": 0.0361, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.9543147208121827, |
|
"grad_norm": 0.9468094706535339, |
|
"learning_rate": 3.2700641322677405e-06, |
|
"loss": 0.0309, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.9606598984771573, |
|
"grad_norm": 1.2729860544204712, |
|
"learning_rate": 3.235475106308569e-06, |
|
"loss": 0.0194, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.967005076142132, |
|
"grad_norm": 1.381415843963623, |
|
"learning_rate": 3.200982269576111e-06, |
|
"loss": 0.0495, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.9733502538071066, |
|
"grad_norm": 1.4151417016983032, |
|
"learning_rate": 3.1665875023721453e-06, |
|
"loss": 0.0344, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.9796954314720812, |
|
"grad_norm": 0.9717885851860046, |
|
"learning_rate": 3.1322926796524016e-06, |
|
"loss": 0.0376, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.986040609137056, |
|
"grad_norm": 0.9146430492401123, |
|
"learning_rate": 3.0980996709243517e-06, |
|
"loss": 0.028, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.9923857868020305, |
|
"grad_norm": 1.5948601961135864, |
|
"learning_rate": 3.0640103401453035e-06, |
|
"loss": 0.0511, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.998730964467005, |
|
"grad_norm": 1.120682716369629, |
|
"learning_rate": 3.030026545620787e-06, |
|
"loss": 0.0411, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.00507614213198, |
|
"grad_norm": 0.8402583003044128, |
|
"learning_rate": 2.9961501399032546e-06, |
|
"loss": 0.0272, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.011421319796954, |
|
"grad_norm": 1.102598786354065, |
|
"learning_rate": 2.9623829696910867e-06, |
|
"loss": 0.0207, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.017766497461929, |
|
"grad_norm": 0.9598972201347351, |
|
"learning_rate": 2.928726875727937e-06, |
|
"loss": 0.0197, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.0241116751269037, |
|
"grad_norm": 0.8507049679756165, |
|
"learning_rate": 2.8951836927023703e-06, |
|
"loss": 0.0161, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"grad_norm": 0.9228895902633667, |
|
"learning_rate": 2.861755249147862e-06, |
|
"loss": 0.023, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.036802030456853, |
|
"grad_norm": 0.8271005749702454, |
|
"learning_rate": 2.828443367343119e-06, |
|
"loss": 0.0148, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.0431472081218276, |
|
"grad_norm": 1.2311136722564697, |
|
"learning_rate": 2.7952498632127324e-06, |
|
"loss": 0.0202, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.049492385786802, |
|
"grad_norm": 1.3220641613006592, |
|
"learning_rate": 2.762176546228198e-06, |
|
"loss": 0.0235, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.0558375634517767, |
|
"grad_norm": 1.2385421991348267, |
|
"learning_rate": 2.7292252193092693e-06, |
|
"loss": 0.0205, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.0621827411167515, |
|
"grad_norm": 1.238295316696167, |
|
"learning_rate": 2.6963976787256726e-06, |
|
"loss": 0.0157, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.068527918781726, |
|
"grad_norm": 0.7305588126182556, |
|
"learning_rate": 2.6636957139992003e-06, |
|
"loss": 0.0183, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.0748730964467006, |
|
"grad_norm": 0.8512719869613647, |
|
"learning_rate": 2.631121107806144e-06, |
|
"loss": 0.0204, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.081218274111675, |
|
"grad_norm": 0.8006191849708557, |
|
"learning_rate": 2.598675635880129e-06, |
|
"loss": 0.0223, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.0875634517766497, |
|
"grad_norm": 1.4886091947555542, |
|
"learning_rate": 2.5663610669153043e-06, |
|
"loss": 0.0197, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.0939086294416245, |
|
"grad_norm": 0.7531688213348389, |
|
"learning_rate": 2.534179162469924e-06, |
|
"loss": 0.0222, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.100253807106599, |
|
"grad_norm": 0.6706914305686951, |
|
"learning_rate": 2.502131676870335e-06, |
|
"loss": 0.019, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.1065989847715736, |
|
"grad_norm": 0.8195891380310059, |
|
"learning_rate": 2.470220357115327e-06, |
|
"loss": 0.0099, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.1129441624365484, |
|
"grad_norm": 0.8743392825126648, |
|
"learning_rate": 2.438446942780911e-06, |
|
"loss": 0.0145, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.1192893401015227, |
|
"grad_norm": 0.5079776048660278, |
|
"learning_rate": 2.4068131659254803e-06, |
|
"loss": 0.0164, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.1256345177664975, |
|
"grad_norm": 0.512514054775238, |
|
"learning_rate": 2.3753207509953963e-06, |
|
"loss": 0.0287, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.1319796954314723, |
|
"grad_norm": 0.7019079923629761, |
|
"learning_rate": 2.3439714147309845e-06, |
|
"loss": 0.0189, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.1383248730964466, |
|
"grad_norm": 0.8089588284492493, |
|
"learning_rate": 2.312766866072947e-06, |
|
"loss": 0.0255, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.1446700507614214, |
|
"grad_norm": 0.9173935651779175, |
|
"learning_rate": 2.2817088060692094e-06, |
|
"loss": 0.0149, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.151015228426396, |
|
"grad_norm": 1.1662015914916992, |
|
"learning_rate": 2.2507989277821847e-06, |
|
"loss": 0.0201, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.1573604060913705, |
|
"grad_norm": 0.5388917922973633, |
|
"learning_rate": 2.2200389161964795e-06, |
|
"loss": 0.0198, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.1637055837563453, |
|
"grad_norm": 1.1195067167282104, |
|
"learning_rate": 2.189430448127055e-06, |
|
"loss": 0.0196, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.1700507614213196, |
|
"grad_norm": 0.7136582732200623, |
|
"learning_rate": 2.1589751921277925e-06, |
|
"loss": 0.0188, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.1763959390862944, |
|
"grad_norm": 0.773573100566864, |
|
"learning_rate": 2.128674808400565e-06, |
|
"loss": 0.0212, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.182741116751269, |
|
"grad_norm": 0.7614580392837524, |
|
"learning_rate": 2.098530948704714e-06, |
|
"loss": 0.021, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.1890862944162435, |
|
"grad_norm": 0.6622429490089417, |
|
"learning_rate": 2.068545256267015e-06, |
|
"loss": 0.0169, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.1954314720812182, |
|
"grad_norm": 0.3882254660129547, |
|
"learning_rate": 2.0387193656921063e-06, |
|
"loss": 0.023, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.201776649746193, |
|
"grad_norm": 1.2883610725402832, |
|
"learning_rate": 2.0090549028733685e-06, |
|
"loss": 0.0179, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.2081218274111674, |
|
"grad_norm": 1.0185002088546753, |
|
"learning_rate": 1.9795534849043054e-06, |
|
"loss": 0.0206, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.214467005076142, |
|
"grad_norm": 0.7340651154518127, |
|
"learning_rate": 1.950216719990383e-06, |
|
"loss": 0.0159, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.220812182741117, |
|
"grad_norm": 0.8917669057846069, |
|
"learning_rate": 1.921046207361365e-06, |
|
"loss": 0.014, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.2271573604060912, |
|
"grad_norm": 0.8342999815940857, |
|
"learning_rate": 1.8920435371841394e-06, |
|
"loss": 0.0168, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.233502538071066, |
|
"grad_norm": 0.49451372027397156, |
|
"learning_rate": 1.8632102904760241e-06, |
|
"loss": 0.0202, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.239847715736041, |
|
"grad_norm": 0.8475871086120605, |
|
"learning_rate": 1.8345480390185865e-06, |
|
"loss": 0.0228, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.246192893401015, |
|
"grad_norm": 0.6851008534431458, |
|
"learning_rate": 1.806058345271962e-06, |
|
"loss": 0.016, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.25253807106599, |
|
"grad_norm": 1.2128303050994873, |
|
"learning_rate": 1.7777427622896764e-06, |
|
"loss": 0.0183, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.2588832487309647, |
|
"grad_norm": 0.3974970877170563, |
|
"learning_rate": 1.749602833633992e-06, |
|
"loss": 0.0221, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.265228426395939, |
|
"grad_norm": 0.6373499631881714, |
|
"learning_rate": 1.7216400932917544e-06, |
|
"loss": 0.0184, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.271573604060914, |
|
"grad_norm": 0.6473302245140076, |
|
"learning_rate": 1.6938560655907743e-06, |
|
"loss": 0.0156, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.277918781725888, |
|
"grad_norm": 0.5753197073936462, |
|
"learning_rate": 1.6662522651167345e-06, |
|
"loss": 0.0137, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.284263959390863, |
|
"grad_norm": 0.9094467759132385, |
|
"learning_rate": 1.6388301966306215e-06, |
|
"loss": 0.0147, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.2906091370558377, |
|
"grad_norm": 0.5902413725852966, |
|
"learning_rate": 1.6115913549867025e-06, |
|
"loss": 0.0224, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.296954314720812, |
|
"grad_norm": 0.875133752822876, |
|
"learning_rate": 1.5845372250510287e-06, |
|
"loss": 0.0232, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.303299492385787, |
|
"grad_norm": 1.241910696029663, |
|
"learning_rate": 1.557669281620497e-06, |
|
"loss": 0.0099, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.3096446700507616, |
|
"grad_norm": 0.6328564882278442, |
|
"learning_rate": 1.5309889893424563e-06, |
|
"loss": 0.0132, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.315989847715736, |
|
"grad_norm": 0.5470057725906372, |
|
"learning_rate": 1.5044978026348527e-06, |
|
"loss": 0.0164, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.3223350253807107, |
|
"grad_norm": 1.0264612436294556, |
|
"learning_rate": 1.4781971656069665e-06, |
|
"loss": 0.0203, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.3286802030456855, |
|
"grad_norm": 0.6052107810974121, |
|
"learning_rate": 1.4520885119806704e-06, |
|
"loss": 0.026, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.33502538071066, |
|
"grad_norm": 0.4180527329444885, |
|
"learning_rate": 1.4261732650122795e-06, |
|
"loss": 0.0204, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.3413705583756346, |
|
"grad_norm": 0.6096001267433167, |
|
"learning_rate": 1.4004528374149745e-06, |
|
"loss": 0.0095, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.347715736040609, |
|
"grad_norm": 0.5584781765937805, |
|
"learning_rate": 1.3749286312817722e-06, |
|
"loss": 0.0126, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.3540609137055837, |
|
"grad_norm": 0.3657080829143524, |
|
"learning_rate": 1.349602038009114e-06, |
|
"loss": 0.0108, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.3604060913705585, |
|
"grad_norm": 0.9728971719741821, |
|
"learning_rate": 1.3244744382210017e-06, |
|
"loss": 0.0104, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.3667512690355332, |
|
"grad_norm": 0.8524286150932312, |
|
"learning_rate": 1.2995472016937405e-06, |
|
"loss": 0.0167, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.3730964467005076, |
|
"grad_norm": 0.6725841164588928, |
|
"learning_rate": 1.2748216872812747e-06, |
|
"loss": 0.0131, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.3794416243654823, |
|
"grad_norm": 0.8610649704933167, |
|
"learning_rate": 1.2502992428411022e-06, |
|
"loss": 0.018, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.3857868020304567, |
|
"grad_norm": 0.4205199182033539, |
|
"learning_rate": 1.2259812051608066e-06, |
|
"loss": 0.0158, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.3921319796954315, |
|
"grad_norm": 0.7805858850479126, |
|
"learning_rate": 1.2018688998851802e-06, |
|
"loss": 0.0203, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.3984771573604062, |
|
"grad_norm": 0.2444067746400833, |
|
"learning_rate": 1.1779636414439672e-06, |
|
"loss": 0.0147, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.4048223350253806, |
|
"grad_norm": 0.40047794580459595, |
|
"learning_rate": 1.1542667329801998e-06, |
|
"loss": 0.011, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.4111675126903553, |
|
"grad_norm": 0.7459643483161926, |
|
"learning_rate": 1.130779466279166e-06, |
|
"loss": 0.0126, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.41751269035533, |
|
"grad_norm": 0.6922224760055542, |
|
"learning_rate": 1.107503121697997e-06, |
|
"loss": 0.0163, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.4238578680203045, |
|
"grad_norm": 1.863350749015808, |
|
"learning_rate": 1.0844389680958533e-06, |
|
"loss": 0.0194, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.4302030456852792, |
|
"grad_norm": 0.29856589436531067, |
|
"learning_rate": 1.0615882627647766e-06, |
|
"loss": 0.0155, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.436548223350254, |
|
"grad_norm": 0.377093642950058, |
|
"learning_rate": 1.0389522513611372e-06, |
|
"loss": 0.015, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.4428934010152283, |
|
"grad_norm": 0.5333195924758911, |
|
"learning_rate": 1.0165321678377332e-06, |
|
"loss": 0.0137, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.449238578680203, |
|
"grad_norm": 0.32329970598220825, |
|
"learning_rate": 9.943292343765293e-07, |
|
"loss": 0.0084, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.4555837563451774, |
|
"grad_norm": 0.3231019377708435, |
|
"learning_rate": 9.723446613220249e-07, |
|
"loss": 0.0126, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.4619289340101522, |
|
"grad_norm": 0.6870127320289612, |
|
"learning_rate": 9.505796471152783e-07, |
|
"loss": 0.0137, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.468274111675127, |
|
"grad_norm": 0.6023297309875488, |
|
"learning_rate": 9.290353782285766e-07, |
|
"loss": 0.0148, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.4746192893401013, |
|
"grad_norm": 0.46455860137939453, |
|
"learning_rate": 9.077130291007553e-07, |
|
"loss": 0.022, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.480964467005076, |
|
"grad_norm": 0.5320664048194885, |
|
"learning_rate": 8.86613762073183e-07, |
|
"loss": 0.0096, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.487309644670051, |
|
"grad_norm": 0.6012682914733887, |
|
"learning_rate": 8.657387273263895e-07, |
|
"loss": 0.0099, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.4936548223350252, |
|
"grad_norm": 0.8949501514434814, |
|
"learning_rate": 8.450890628173725e-07, |
|
"loss": 0.0111, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.8802683353424072, |
|
"learning_rate": 8.246658942175611e-07, |
|
"loss": 0.0143, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.5063451776649748, |
|
"grad_norm": 0.9922573566436768, |
|
"learning_rate": 8.04470334851456e-07, |
|
"loss": 0.0234, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.512690355329949, |
|
"grad_norm": 0.23940332233905792, |
|
"learning_rate": 7.845034856359368e-07, |
|
"loss": 0.011, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.519035532994924, |
|
"grad_norm": 0.2019755095243454, |
|
"learning_rate": 7.647664350202461e-07, |
|
"loss": 0.0135, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.525380710659898, |
|
"grad_norm": 0.17184686660766602, |
|
"learning_rate": 7.452602589266583e-07, |
|
"loss": 0.0074, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.531725888324873, |
|
"grad_norm": 0.8647210597991943, |
|
"learning_rate": 7.259860206918268e-07, |
|
"loss": 0.0101, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.5380710659898478, |
|
"grad_norm": 0.9781297445297241, |
|
"learning_rate": 7.069447710088167e-07, |
|
"loss": 0.0147, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.5444162436548226, |
|
"grad_norm": 0.7230397462844849, |
|
"learning_rate": 6.881375478698332e-07, |
|
"loss": 0.0159, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.550761421319797, |
|
"grad_norm": 1.1674317121505737, |
|
"learning_rate": 6.695653765096327e-07, |
|
"loss": 0.0125, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.5571065989847717, |
|
"grad_norm": 0.38593119382858276, |
|
"learning_rate": 6.512292693496353e-07, |
|
"loss": 0.0071, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.563451776649746, |
|
"grad_norm": 0.3000188171863556, |
|
"learning_rate": 6.331302259427418e-07, |
|
"loss": 0.0086, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.5697969543147208, |
|
"grad_norm": 0.6724553108215332, |
|
"learning_rate": 6.152692329188297e-07, |
|
"loss": 0.0076, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.5761421319796955, |
|
"grad_norm": 1.0246587991714478, |
|
"learning_rate": 5.976472639309888e-07, |
|
"loss": 0.02, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.5824873096446703, |
|
"grad_norm": 0.5962472558021545, |
|
"learning_rate": 5.802652796024294e-07, |
|
"loss": 0.0208, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.5888324873096447, |
|
"grad_norm": 0.44684454798698425, |
|
"learning_rate": 5.631242274741211e-07, |
|
"loss": 0.0179, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.5951776649746194, |
|
"grad_norm": 0.446123331785202, |
|
"learning_rate": 5.46225041953145e-07, |
|
"loss": 0.0065, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.6015228426395938, |
|
"grad_norm": 0.28516885638237, |
|
"learning_rate": 5.295686442617442e-07, |
|
"loss": 0.0084, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.6078680203045685, |
|
"grad_norm": 0.42138996720314026, |
|
"learning_rate": 5.131559423871191e-07, |
|
"loss": 0.0119, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.6142131979695433, |
|
"grad_norm": 0.857070803642273, |
|
"learning_rate": 4.969878310319204e-07, |
|
"loss": 0.0116, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.6205583756345177, |
|
"grad_norm": 0.4262557327747345, |
|
"learning_rate": 4.810651915654807e-07, |
|
"loss": 0.013, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.6269035532994924, |
|
"grad_norm": 0.08034439384937286, |
|
"learning_rate": 4.6538889197576985e-07, |
|
"loss": 0.0085, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.6332487309644668, |
|
"grad_norm": 0.4999110698699951, |
|
"learning_rate": 4.4995978682207396e-07, |
|
"loss": 0.0104, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.6395939086294415, |
|
"grad_norm": 0.47301802039146423, |
|
"learning_rate": 4.347787171884149e-07, |
|
"loss": 0.013, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.6459390862944163, |
|
"grad_norm": 0.2837192416191101, |
|
"learning_rate": 4.1984651063769864e-07, |
|
"loss": 0.0123, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.652284263959391, |
|
"grad_norm": 0.4908500611782074, |
|
"learning_rate": 4.0516398116660196e-07, |
|
"loss": 0.0137, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.6586294416243654, |
|
"grad_norm": 0.38162919878959656, |
|
"learning_rate": 3.907319291612027e-07, |
|
"loss": 0.0108, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.66497461928934, |
|
"grad_norm": 0.9448516368865967, |
|
"learning_rate": 3.765511413533429e-07, |
|
"loss": 0.0139, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.6713197969543145, |
|
"grad_norm": 0.4047912359237671, |
|
"learning_rate": 3.626223907777482e-07, |
|
"loss": 0.0147, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.6776649746192893, |
|
"grad_norm": 0.1890551596879959, |
|
"learning_rate": 3.489464367298795e-07, |
|
"loss": 0.0135, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.684010152284264, |
|
"grad_norm": 0.3367404341697693, |
|
"learning_rate": 3.3552402472454893e-07, |
|
"loss": 0.017, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.6903553299492384, |
|
"grad_norm": 0.5344458818435669, |
|
"learning_rate": 3.2235588645527893e-07, |
|
"loss": 0.0201, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.696700507614213, |
|
"grad_norm": 0.8313795328140259, |
|
"learning_rate": 3.094427397544103e-07, |
|
"loss": 0.0162, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.703045685279188, |
|
"grad_norm": 0.35280096530914307, |
|
"learning_rate": 2.967852885539768e-07, |
|
"loss": 0.0064, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.7093908629441623, |
|
"grad_norm": 0.6538042426109314, |
|
"learning_rate": 2.843842228473293e-07, |
|
"loss": 0.0145, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.715736040609137, |
|
"grad_norm": 0.6905611753463745, |
|
"learning_rate": 2.7224021865151996e-07, |
|
"loss": 0.0128, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.722081218274112, |
|
"grad_norm": 0.5076076984405518, |
|
"learning_rate": 2.603539379704567e-07, |
|
"loss": 0.0171, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.728426395939086, |
|
"grad_norm": 0.6590428352355957, |
|
"learning_rate": 2.4872602875881004e-07, |
|
"loss": 0.0077, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.734771573604061, |
|
"grad_norm": 0.3470360338687897, |
|
"learning_rate": 2.373571248866946e-07, |
|
"loss": 0.0115, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.7411167512690353, |
|
"grad_norm": 0.5780541896820068, |
|
"learning_rate": 2.262478461051132e-07, |
|
"loss": 0.0191, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.74746192893401, |
|
"grad_norm": 1.4629708528518677, |
|
"learning_rate": 2.153987980121719e-07, |
|
"loss": 0.0189, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.753807106598985, |
|
"grad_norm": 1.3563203811645508, |
|
"learning_rate": 2.0481057202006992e-07, |
|
"loss": 0.0116, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.7601522842639596, |
|
"grad_norm": 0.4442911744117737, |
|
"learning_rate": 1.9448374532285707e-07, |
|
"loss": 0.0153, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.766497461928934, |
|
"grad_norm": 0.26719120144844055, |
|
"learning_rate": 1.8441888086497162e-07, |
|
"loss": 0.0156, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.7728426395939088, |
|
"grad_norm": 0.4203988015651703, |
|
"learning_rate": 1.7461652731055157e-07, |
|
"loss": 0.0162, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.779187817258883, |
|
"grad_norm": 1.0901730060577393, |
|
"learning_rate": 1.650772190135247e-07, |
|
"loss": 0.0131, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.785532994923858, |
|
"grad_norm": 0.3400239944458008, |
|
"learning_rate": 1.5580147598848018e-07, |
|
"loss": 0.0141, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.7918781725888326, |
|
"grad_norm": 0.38450250029563904, |
|
"learning_rate": 1.4678980388232233e-07, |
|
"loss": 0.0099, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.798223350253807, |
|
"grad_norm": 0.4401623606681824, |
|
"learning_rate": 1.3804269394670388e-07, |
|
"loss": 0.0166, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.8045685279187818, |
|
"grad_norm": 0.765143871307373, |
|
"learning_rate": 1.295606230112495e-07, |
|
"loss": 0.015, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.810913705583756, |
|
"grad_norm": 0.47553789615631104, |
|
"learning_rate": 1.2134405345755773e-07, |
|
"loss": 0.0104, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.817258883248731, |
|
"grad_norm": 1.053678274154663, |
|
"learning_rate": 1.1339343319400175e-07, |
|
"loss": 0.0085, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.8236040609137056, |
|
"grad_norm": 0.5694789290428162, |
|
"learning_rate": 1.057091956313061e-07, |
|
"loss": 0.0131, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.8299492385786804, |
|
"grad_norm": 0.41042569279670715, |
|
"learning_rate": 9.829175965892557e-08, |
|
"loss": 0.0162, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.8362944162436547, |
|
"grad_norm": 0.30753186345100403, |
|
"learning_rate": 9.114152962220734e-08, |
|
"loss": 0.0085, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.8426395939086295, |
|
"grad_norm": 1.1423698663711548, |
|
"learning_rate": 8.425889530034815e-08, |
|
"loss": 0.0111, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.848984771573604, |
|
"grad_norm": 0.9772459864616394, |
|
"learning_rate": 7.764423188515058e-08, |
|
"loss": 0.0137, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.8553299492385786, |
|
"grad_norm": 0.2530859112739563, |
|
"learning_rate": 7.129789996056568e-08, |
|
"loss": 0.0148, |
|
"step": 4500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4728, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 391740982493184.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|