|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9889162927525035, |
|
"eval_steps": 500, |
|
"global_step": 5200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0019017621014471222, |
|
"grad_norm": 5.274278163909912, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 1.3065, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0038035242028942443, |
|
"grad_norm": 6.159559726715088, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.5845, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005705286304341367, |
|
"grad_norm": 0.032951805740594864, |
|
"learning_rate": 0.000145, |
|
"loss": 0.0535, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007607048405788489, |
|
"grad_norm": 0.0016232666093856096, |
|
"learning_rate": 0.00019500000000000002, |
|
"loss": 0.0011, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00950881050723561, |
|
"grad_norm": 0.03492136672139168, |
|
"learning_rate": 0.000245, |
|
"loss": 0.0004, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.011410572608682733, |
|
"grad_norm": 0.03490574657917023, |
|
"learning_rate": 0.000295, |
|
"loss": 0.0002, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.013312334710129855, |
|
"grad_norm": 0.2781233489513397, |
|
"learning_rate": 0.000345, |
|
"loss": 0.0028, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.015214096811576977, |
|
"grad_norm": 0.2495400309562683, |
|
"learning_rate": 0.000395, |
|
"loss": 0.0054, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0171158589130241, |
|
"grad_norm": 0.016650637611746788, |
|
"learning_rate": 0.00044500000000000003, |
|
"loss": 0.0113, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01901762101447122, |
|
"grad_norm": 0.006048521026968956, |
|
"learning_rate": 0.000495, |
|
"loss": 0.0129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.020919383115918344, |
|
"grad_norm": 0.04650586470961571, |
|
"learning_rate": 0.0004999995932430571, |
|
"loss": 0.0531, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.022821145217365467, |
|
"grad_norm": 0.2665582597255707, |
|
"learning_rate": 0.0004999981871713734, |
|
"loss": 0.0106, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.024722907318812586, |
|
"grad_norm": 0.3457753360271454, |
|
"learning_rate": 0.0004999957767689057, |
|
"loss": 0.0111, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02662466942025971, |
|
"grad_norm": 0.3711691200733185, |
|
"learning_rate": 0.0004999923620453374, |
|
"loss": 0.0089, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.028526431521706832, |
|
"grad_norm": 0.5576126575469971, |
|
"learning_rate": 0.0004999879430143867, |
|
"loss": 0.0225, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.030428193623153955, |
|
"grad_norm": 0.061603959649801254, |
|
"learning_rate": 0.0004999825196938062, |
|
"loss": 0.0066, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.032329955724601074, |
|
"grad_norm": 0.443553626537323, |
|
"learning_rate": 0.0004999760921053835, |
|
"loss": 0.0123, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0342317178260482, |
|
"grad_norm": 0.24774852395057678, |
|
"learning_rate": 0.0004999686602749405, |
|
"loss": 0.0153, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03613347992749532, |
|
"grad_norm": 0.07191673666238785, |
|
"learning_rate": 0.0004999602242323333, |
|
"loss": 0.0126, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03803524202894244, |
|
"grad_norm": 0.48675355315208435, |
|
"learning_rate": 0.0004999507840114525, |
|
"loss": 0.0232, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.039937004130389565, |
|
"grad_norm": 0.37680506706237793, |
|
"learning_rate": 0.000499940339650223, |
|
"loss": 0.0113, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04183876623183669, |
|
"grad_norm": 0.4189215302467346, |
|
"learning_rate": 0.0004999288911906033, |
|
"loss": 0.0111, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04374052833328381, |
|
"grad_norm": 0.5190231204032898, |
|
"learning_rate": 0.0004999164386785859, |
|
"loss": 0.0181, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.045642290434730934, |
|
"grad_norm": 0.3827992379665375, |
|
"learning_rate": 0.0004999029821641969, |
|
"loss": 0.0254, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04754405253617805, |
|
"grad_norm": 0.3790438771247864, |
|
"learning_rate": 0.0004998885217014959, |
|
"loss": 0.0135, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04944581463762517, |
|
"grad_norm": 0.32959797978401184, |
|
"learning_rate": 0.0004998730573485757, |
|
"loss": 0.02, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.051347576739072295, |
|
"grad_norm": 0.11981680989265442, |
|
"learning_rate": 0.0004998565891675621, |
|
"loss": 0.0274, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05324933884051942, |
|
"grad_norm": 0.417461097240448, |
|
"learning_rate": 0.0004998391172246136, |
|
"loss": 0.0226, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05515110094196654, |
|
"grad_norm": 0.5383813381195068, |
|
"learning_rate": 0.0004998206415899208, |
|
"loss": 0.0198, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.057052863043413664, |
|
"grad_norm": 0.49503782391548157, |
|
"learning_rate": 0.0004998011623377073, |
|
"loss": 0.0217, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05895462514486079, |
|
"grad_norm": 0.5187065005302429, |
|
"learning_rate": 0.0004997806795462279, |
|
"loss": 0.0251, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06085638724630791, |
|
"grad_norm": 0.27587956190109253, |
|
"learning_rate": 0.0004997591932977692, |
|
"loss": 0.0212, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06275814934775503, |
|
"grad_norm": 0.19010399281978607, |
|
"learning_rate": 0.000499736703678649, |
|
"loss": 0.0186, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06465991144920215, |
|
"grad_norm": 0.28244227170944214, |
|
"learning_rate": 0.0004997132107792161, |
|
"loss": 0.0281, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06656167355064928, |
|
"grad_norm": 0.3918805420398712, |
|
"learning_rate": 0.0004996887146938497, |
|
"loss": 0.0349, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0684634356520964, |
|
"grad_norm": 0.3364613354206085, |
|
"learning_rate": 0.0004996632155209592, |
|
"loss": 0.0192, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07036519775354352, |
|
"grad_norm": 0.20451515913009644, |
|
"learning_rate": 0.0004996367133629837, |
|
"loss": 0.0341, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07226695985499064, |
|
"grad_norm": 0.5754940509796143, |
|
"learning_rate": 0.0004996092083263919, |
|
"loss": 0.0327, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07416872195643776, |
|
"grad_norm": 0.5575674176216125, |
|
"learning_rate": 0.000499580700521681, |
|
"loss": 0.0407, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07607048405788489, |
|
"grad_norm": 0.4820100665092468, |
|
"learning_rate": 0.0004995511900633771, |
|
"loss": 0.0441, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.077972246159332, |
|
"grad_norm": 0.49140122532844543, |
|
"learning_rate": 0.000499520677070034, |
|
"loss": 0.036, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07987400826077913, |
|
"grad_norm": 0.3462704122066498, |
|
"learning_rate": 0.0004994891616642331, |
|
"loss": 0.0325, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08177577036222625, |
|
"grad_norm": 0.3936806917190552, |
|
"learning_rate": 0.000499456643972583, |
|
"loss": 0.0406, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08367753246367338, |
|
"grad_norm": 0.3352107107639313, |
|
"learning_rate": 0.0004994231241257185, |
|
"loss": 0.0488, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08557929456512049, |
|
"grad_norm": 0.4352121353149414, |
|
"learning_rate": 0.0004993886022583009, |
|
"loss": 0.0379, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08748105666656762, |
|
"grad_norm": 0.4921802282333374, |
|
"learning_rate": 0.0004993530785090166, |
|
"loss": 0.1226, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08938281876801474, |
|
"grad_norm": 0.5758520364761353, |
|
"learning_rate": 0.000499316553020577, |
|
"loss": 0.0551, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09128458086946187, |
|
"grad_norm": 0.38870593905448914, |
|
"learning_rate": 0.0004992790259397178, |
|
"loss": 0.0497, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09318634297090898, |
|
"grad_norm": 0.361654132604599, |
|
"learning_rate": 0.0004992404974171985, |
|
"loss": 0.0599, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0950881050723561, |
|
"grad_norm": 0.452127069234848, |
|
"learning_rate": 0.000499200967607802, |
|
"loss": 0.0611, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09698986717380323, |
|
"grad_norm": 0.5591238737106323, |
|
"learning_rate": 0.0004991604366703332, |
|
"loss": 0.0586, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09889162927525035, |
|
"grad_norm": 0.40963825583457947, |
|
"learning_rate": 0.0004991189047676192, |
|
"loss": 0.048, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10079339137669747, |
|
"grad_norm": 0.5524506568908691, |
|
"learning_rate": 0.0004990763720665083, |
|
"loss": 0.0497, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10269515347814459, |
|
"grad_norm": 0.3613679111003876, |
|
"learning_rate": 0.0004990328387378695, |
|
"loss": 0.052, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.10459691557959172, |
|
"grad_norm": 0.5716497898101807, |
|
"learning_rate": 0.0004989883049565912, |
|
"loss": 0.0512, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10649867768103884, |
|
"grad_norm": 0.48545169830322266, |
|
"learning_rate": 0.0004989427709015816, |
|
"loss": 0.066, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.10840043978248597, |
|
"grad_norm": 0.3815110921859741, |
|
"learning_rate": 0.0004988962367557668, |
|
"loss": 0.0629, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11030220188393308, |
|
"grad_norm": 0.40193411707878113, |
|
"learning_rate": 0.000498848702706091, |
|
"loss": 0.068, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1122039639853802, |
|
"grad_norm": 0.42606788873672485, |
|
"learning_rate": 0.0004988001689435152, |
|
"loss": 0.0478, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11410572608682733, |
|
"grad_norm": 0.7104797959327698, |
|
"learning_rate": 0.0004987506356630165, |
|
"loss": 0.0607, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11600748818827444, |
|
"grad_norm": 0.3995862305164337, |
|
"learning_rate": 0.0004987001030635878, |
|
"loss": 0.0565, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11790925028972157, |
|
"grad_norm": 0.6329849362373352, |
|
"learning_rate": 0.0004986485713482361, |
|
"loss": 0.0733, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11981101239116869, |
|
"grad_norm": 0.5191243886947632, |
|
"learning_rate": 0.0004985960407239825, |
|
"loss": 0.0565, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12171277449261582, |
|
"grad_norm": 0.4808266758918762, |
|
"learning_rate": 0.0004985425114018611, |
|
"loss": 0.075, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12361453659406293, |
|
"grad_norm": 0.49313193559646606, |
|
"learning_rate": 0.000498487983596918, |
|
"loss": 0.0637, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12551629869551006, |
|
"grad_norm": 0.6945317387580872, |
|
"learning_rate": 0.0004984324575282107, |
|
"loss": 0.0778, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1274180607969572, |
|
"grad_norm": 0.3684280514717102, |
|
"learning_rate": 0.0004983759334188068, |
|
"loss": 0.0577, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.1293198228984043, |
|
"grad_norm": 0.5096045136451721, |
|
"learning_rate": 0.0004983184114957836, |
|
"loss": 0.073, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13122158499985143, |
|
"grad_norm": 0.24991731345653534, |
|
"learning_rate": 0.000498259891990227, |
|
"loss": 0.079, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13312334710129856, |
|
"grad_norm": 0.4371449649333954, |
|
"learning_rate": 0.0004982003751372306, |
|
"loss": 0.0814, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13502510920274566, |
|
"grad_norm": 0.7052175402641296, |
|
"learning_rate": 0.0004981398611758942, |
|
"loss": 0.0813, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1369268713041928, |
|
"grad_norm": 0.3389616310596466, |
|
"learning_rate": 0.0004980783503493241, |
|
"loss": 0.0806, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.13882863340563992, |
|
"grad_norm": 0.5502128601074219, |
|
"learning_rate": 0.0004980158429046306, |
|
"loss": 0.0623, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14073039550708705, |
|
"grad_norm": 0.6836037635803223, |
|
"learning_rate": 0.0004979523390929285, |
|
"loss": 0.0701, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14263215760853415, |
|
"grad_norm": 0.7100756168365479, |
|
"learning_rate": 0.0004978878391693346, |
|
"loss": 0.0812, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14453391970998128, |
|
"grad_norm": 0.5102308392524719, |
|
"learning_rate": 0.000497822343392968, |
|
"loss": 0.06, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1464356818114284, |
|
"grad_norm": 0.4712526202201843, |
|
"learning_rate": 0.0004977558520269484, |
|
"loss": 0.0793, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1483374439128755, |
|
"grad_norm": 0.7232615947723389, |
|
"learning_rate": 0.0004976883653383948, |
|
"loss": 0.0674, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15023920601432264, |
|
"grad_norm": 0.5397039651870728, |
|
"learning_rate": 0.0004976198835984253, |
|
"loss": 0.0613, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15214096811576977, |
|
"grad_norm": 0.5102890133857727, |
|
"learning_rate": 0.0004975504070821548, |
|
"loss": 0.0823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1540427302172169, |
|
"grad_norm": 0.5335078239440918, |
|
"learning_rate": 0.0004974799360686952, |
|
"loss": 0.0833, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.155944492318664, |
|
"grad_norm": 0.5386427640914917, |
|
"learning_rate": 0.0004974084708411535, |
|
"loss": 0.0736, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.15784625442011113, |
|
"grad_norm": 0.5704069137573242, |
|
"learning_rate": 0.0004973360116866303, |
|
"loss": 0.0788, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.15974801652155826, |
|
"grad_norm": 0.6669997572898865, |
|
"learning_rate": 0.0004972625588962199, |
|
"loss": 0.0713, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1616497786230054, |
|
"grad_norm": 0.4686433970928192, |
|
"learning_rate": 0.000497188112765008, |
|
"loss": 0.0768, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1635515407244525, |
|
"grad_norm": 0.622735321521759, |
|
"learning_rate": 0.0004971126735920707, |
|
"loss": 0.0857, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16545330282589962, |
|
"grad_norm": 0.4435874819755554, |
|
"learning_rate": 0.0004970362416804739, |
|
"loss": 0.108, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16735506492734675, |
|
"grad_norm": 0.6319471001625061, |
|
"learning_rate": 0.0004969588173372716, |
|
"loss": 0.1079, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.16925682702879385, |
|
"grad_norm": 0.730019748210907, |
|
"learning_rate": 0.0004968804008735044, |
|
"loss": 0.1124, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17115858913024098, |
|
"grad_norm": 0.32919904589653015, |
|
"learning_rate": 0.0004968009926041991, |
|
"loss": 0.0877, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17306035123168811, |
|
"grad_norm": 0.5234276652336121, |
|
"learning_rate": 0.0004967205928483666, |
|
"loss": 0.081, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17496211333313524, |
|
"grad_norm": 0.5863314270973206, |
|
"learning_rate": 0.000496639201929001, |
|
"loss": 0.0709, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17686387543458235, |
|
"grad_norm": 0.42288297414779663, |
|
"learning_rate": 0.0004965568201730783, |
|
"loss": 0.0923, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.17876563753602948, |
|
"grad_norm": 0.6558719873428345, |
|
"learning_rate": 0.0004964734479115552, |
|
"loss": 0.0787, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.1806673996374766, |
|
"grad_norm": 0.42140552401542664, |
|
"learning_rate": 0.0004963890854793673, |
|
"loss": 0.0836, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18256916173892374, |
|
"grad_norm": 0.8134212493896484, |
|
"learning_rate": 0.0004963037332154281, |
|
"loss": 0.0949, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.18447092384037084, |
|
"grad_norm": 0.6248766183853149, |
|
"learning_rate": 0.0004962173914626279, |
|
"loss": 0.1023, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.18637268594181797, |
|
"grad_norm": 0.7062785029411316, |
|
"learning_rate": 0.0004961300605678318, |
|
"loss": 0.0872, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.1882744480432651, |
|
"grad_norm": 0.5721538662910461, |
|
"learning_rate": 0.000496041740881879, |
|
"loss": 0.104, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1901762101447122, |
|
"grad_norm": 0.6057381629943848, |
|
"learning_rate": 0.0004959524327595805, |
|
"loss": 0.071, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19207797224615933, |
|
"grad_norm": 0.5917540192604065, |
|
"learning_rate": 0.0004958621365597186, |
|
"loss": 0.0931, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19397973434760646, |
|
"grad_norm": 0.8036534190177917, |
|
"learning_rate": 0.000495770852645045, |
|
"loss": 0.1035, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.1958814964490536, |
|
"grad_norm": 0.5942894816398621, |
|
"learning_rate": 0.0004956785813822794, |
|
"loss": 0.1072, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.1977832585505007, |
|
"grad_norm": 0.7127736210823059, |
|
"learning_rate": 0.0004955853231421077, |
|
"loss": 0.091, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.19968502065194782, |
|
"grad_norm": 0.507114827632904, |
|
"learning_rate": 0.0004954910782991814, |
|
"loss": 0.0889, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.20158678275339495, |
|
"grad_norm": 0.5299419164657593, |
|
"learning_rate": 0.000495395847232115, |
|
"loss": 0.1045, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.20348854485484205, |
|
"grad_norm": 0.4970870912075043, |
|
"learning_rate": 0.0004952996303234854, |
|
"loss": 0.0869, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.20539030695628918, |
|
"grad_norm": 0.7103607654571533, |
|
"learning_rate": 0.0004952024279598298, |
|
"loss": 0.0953, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2072920690577363, |
|
"grad_norm": 0.38354578614234924, |
|
"learning_rate": 0.0004951042405316443, |
|
"loss": 0.109, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.20919383115918344, |
|
"grad_norm": 0.7918898463249207, |
|
"learning_rate": 0.0004950050684333823, |
|
"loss": 0.0931, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21109559326063054, |
|
"grad_norm": 0.37249505519866943, |
|
"learning_rate": 0.0004949049120634532, |
|
"loss": 0.112, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.21299735536207767, |
|
"grad_norm": 0.4019123613834381, |
|
"learning_rate": 0.0004948037718242204, |
|
"loss": 0.1038, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2148991174635248, |
|
"grad_norm": 0.5207439661026001, |
|
"learning_rate": 0.0004947016481219997, |
|
"loss": 0.1193, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.21680087956497193, |
|
"grad_norm": 0.41174229979515076, |
|
"learning_rate": 0.0004945985413670581, |
|
"loss": 0.0942, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21870264166641903, |
|
"grad_norm": 0.6560219526290894, |
|
"learning_rate": 0.000494494451973612, |
|
"loss": 0.0643, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.22060440376786616, |
|
"grad_norm": 0.4203394651412964, |
|
"learning_rate": 0.0004943893803598247, |
|
"loss": 0.1187, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2225061658693133, |
|
"grad_norm": 0.6167390942573547, |
|
"learning_rate": 0.0004942833269478063, |
|
"loss": 0.0941, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2244079279707604, |
|
"grad_norm": 0.5587171912193298, |
|
"learning_rate": 0.0004941762921636104, |
|
"loss": 0.0665, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22630969007220753, |
|
"grad_norm": 0.5269094109535217, |
|
"learning_rate": 0.0004940682764372336, |
|
"loss": 0.1143, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.22821145217365466, |
|
"grad_norm": 0.5605872273445129, |
|
"learning_rate": 0.0004939592802026132, |
|
"loss": 0.0913, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23011321427510179, |
|
"grad_norm": 0.49498802423477173, |
|
"learning_rate": 0.0004938493038976251, |
|
"loss": 0.1008, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2320149763765489, |
|
"grad_norm": 0.5630708932876587, |
|
"learning_rate": 0.0004937383479640834, |
|
"loss": 0.0885, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23391673847799602, |
|
"grad_norm": 0.5097892880439758, |
|
"learning_rate": 0.0004936264128477368, |
|
"loss": 0.0914, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.23581850057944315, |
|
"grad_norm": 0.3842105567455292, |
|
"learning_rate": 0.0004935134989982682, |
|
"loss": 0.0792, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23772026268089025, |
|
"grad_norm": 0.5472444891929626, |
|
"learning_rate": 0.0004933996068692922, |
|
"loss": 0.1069, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.23962202478233738, |
|
"grad_norm": 0.32953816652297974, |
|
"learning_rate": 0.0004932847369183538, |
|
"loss": 0.0745, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2415237868837845, |
|
"grad_norm": 0.5384835600852966, |
|
"learning_rate": 0.0004931688896069258, |
|
"loss": 0.1093, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24342554898523164, |
|
"grad_norm": 0.5735477209091187, |
|
"learning_rate": 0.000493052065400408, |
|
"loss": 0.0804, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24532731108667874, |
|
"grad_norm": 0.47209662199020386, |
|
"learning_rate": 0.000492934264768124, |
|
"loss": 0.0848, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.24722907318812587, |
|
"grad_norm": 0.7762870192527771, |
|
"learning_rate": 0.0004928154881833208, |
|
"loss": 0.1005, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.249130835289573, |
|
"grad_norm": 0.5226859450340271, |
|
"learning_rate": 0.0004926957361231655, |
|
"loss": 0.0874, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.25103259739102013, |
|
"grad_norm": 0.6678869128227234, |
|
"learning_rate": 0.0004925750090687445, |
|
"loss": 0.0833, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.25293435949246723, |
|
"grad_norm": 0.5953888893127441, |
|
"learning_rate": 0.0004924533075050609, |
|
"loss": 0.0894, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2548361215939144, |
|
"grad_norm": 0.5249069333076477, |
|
"learning_rate": 0.0004923306319210327, |
|
"loss": 0.1101, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2567378836953615, |
|
"grad_norm": 0.5314027667045593, |
|
"learning_rate": 0.0004922069828094908, |
|
"loss": 0.0916, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2586396457968086, |
|
"grad_norm": 0.4231284558773041, |
|
"learning_rate": 0.0004920823606671774, |
|
"loss": 0.0772, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.26054140789825575, |
|
"grad_norm": 0.5623658895492554, |
|
"learning_rate": 0.0004919567659947435, |
|
"loss": 0.0908, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.26244316999970285, |
|
"grad_norm": 0.5130127668380737, |
|
"learning_rate": 0.0004918301992967472, |
|
"loss": 0.0994, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.26434493210114995, |
|
"grad_norm": 0.33209264278411865, |
|
"learning_rate": 0.0004917026610816516, |
|
"loss": 0.0935, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.2662466942025971, |
|
"grad_norm": 0.39504528045654297, |
|
"learning_rate": 0.0004915741518618222, |
|
"loss": 0.0931, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2681484563040442, |
|
"grad_norm": 0.48092228174209595, |
|
"learning_rate": 0.0004914446721535263, |
|
"loss": 0.1158, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.2700502184054913, |
|
"grad_norm": 0.5582765936851501, |
|
"learning_rate": 0.0004913142224769292, |
|
"loss": 0.0828, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.2719519805069385, |
|
"grad_norm": 0.3209240138530731, |
|
"learning_rate": 0.0004911828033560934, |
|
"loss": 0.0905, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.2738537426083856, |
|
"grad_norm": 1.9139939546585083, |
|
"learning_rate": 0.0004910504153189758, |
|
"loss": 0.1058, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.2757555047098327, |
|
"grad_norm": 0.5591135025024414, |
|
"learning_rate": 0.0004909170588974256, |
|
"loss": 0.4107, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.27765726681127983, |
|
"grad_norm": 0.3873596787452698, |
|
"learning_rate": 0.0004907827346271826, |
|
"loss": 0.1475, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27955902891272694, |
|
"grad_norm": 0.5181601047515869, |
|
"learning_rate": 0.0004906474430478746, |
|
"loss": 0.113, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.2814607910141741, |
|
"grad_norm": 0.3383086025714874, |
|
"learning_rate": 0.0004905111847030159, |
|
"loss": 0.5679, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.2833625531156212, |
|
"grad_norm": 0.49057555198669434, |
|
"learning_rate": 0.0004903739601400039, |
|
"loss": 0.1181, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.2852643152170683, |
|
"grad_norm": 0.572108268737793, |
|
"learning_rate": 0.0004902357699101182, |
|
"loss": 0.1126, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28716607731851546, |
|
"grad_norm": 0.40325993299484253, |
|
"learning_rate": 0.0004900966145685176, |
|
"loss": 0.0911, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.28906783941996256, |
|
"grad_norm": 0.40683841705322266, |
|
"learning_rate": 0.000489956494674238, |
|
"loss": 0.1002, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.29096960152140966, |
|
"grad_norm": 0.5523852705955505, |
|
"learning_rate": 0.0004898154107901905, |
|
"loss": 0.1002, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.2928713636228568, |
|
"grad_norm": 0.5049010515213013, |
|
"learning_rate": 0.0004896733634831589, |
|
"loss": 0.1015, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.2947731257243039, |
|
"grad_norm": 0.3494808077812195, |
|
"learning_rate": 0.0004895303533237969, |
|
"loss": 0.2635, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.296674887825751, |
|
"grad_norm": 0.5694501399993896, |
|
"learning_rate": 0.000489386380886627, |
|
"loss": 0.1048, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.2985766499271982, |
|
"grad_norm": 0.4427148997783661, |
|
"learning_rate": 0.0004892414467500371, |
|
"loss": 0.1254, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3004784120286453, |
|
"grad_norm": 0.28379982709884644, |
|
"learning_rate": 0.0004890955514962786, |
|
"loss": 0.2777, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.30238017413009244, |
|
"grad_norm": 0.3206646740436554, |
|
"learning_rate": 0.0004889486957114642, |
|
"loss": 0.1164, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.30428193623153954, |
|
"grad_norm": 0.5560258030891418, |
|
"learning_rate": 0.0004888008799855655, |
|
"loss": 0.0796, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.30618369833298664, |
|
"grad_norm": 0.43756505846977234, |
|
"learning_rate": 0.00048865210491241, |
|
"loss": 0.1102, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3080854604344338, |
|
"grad_norm": 0.4390459954738617, |
|
"learning_rate": 0.0004885023710896799, |
|
"loss": 0.1831, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3099872225358809, |
|
"grad_norm": 0.3506847620010376, |
|
"learning_rate": 0.0004883516791189084, |
|
"loss": 0.1157, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.311888984637328, |
|
"grad_norm": 0.4262866675853729, |
|
"learning_rate": 0.00048820002960547844, |
|
"loss": 0.1143, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.31379074673877516, |
|
"grad_norm": 0.5669584274291992, |
|
"learning_rate": 0.0004880474231586195, |
|
"loss": 0.1037, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.31569250884022226, |
|
"grad_norm": 0.5108714699745178, |
|
"learning_rate": 0.00048789386039140535, |
|
"loss": 1.2545, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31759427094166937, |
|
"grad_norm": 0.5437518358230591, |
|
"learning_rate": 0.00048773934192075186, |
|
"loss": 0.097, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3194960330431165, |
|
"grad_norm": 0.7191082835197449, |
|
"learning_rate": 0.0004875838683674141, |
|
"loss": 0.0965, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.3213977951445636, |
|
"grad_norm": 0.5136532783508301, |
|
"learning_rate": 0.00048742744035598407, |
|
"loss": 0.0818, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3232995572460108, |
|
"grad_norm": 0.5209280848503113, |
|
"learning_rate": 0.0004872700585148882, |
|
"loss": 0.0967, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3252013193474579, |
|
"grad_norm": 0.36031627655029297, |
|
"learning_rate": 0.00048711172347638484, |
|
"loss": 0.113, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.327103081448905, |
|
"grad_norm": 0.39448150992393494, |
|
"learning_rate": 0.0004869524358765616, |
|
"loss": 0.0733, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32900484355035214, |
|
"grad_norm": 0.4772893786430359, |
|
"learning_rate": 0.00048679219635533276, |
|
"loss": 0.0937, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.33090660565179925, |
|
"grad_norm": 0.4700233042240143, |
|
"learning_rate": 0.0004866310055564371, |
|
"loss": 0.0874, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.33280836775324635, |
|
"grad_norm": 0.444872111082077, |
|
"learning_rate": 0.00048646886412743475, |
|
"loss": 0.0994, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.3347101298546935, |
|
"grad_norm": 0.28628623485565186, |
|
"learning_rate": 0.0004863057727197049, |
|
"loss": 0.097, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.3366118919561406, |
|
"grad_norm": 0.3799450695514679, |
|
"learning_rate": 0.0004861417319884434, |
|
"loss": 0.0807, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.3385136540575877, |
|
"grad_norm": 0.34396281838417053, |
|
"learning_rate": 0.00048597674259265934, |
|
"loss": 0.0895, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.34041541615903487, |
|
"grad_norm": 0.3908982276916504, |
|
"learning_rate": 0.0004858108051951735, |
|
"loss": 0.0872, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.34231717826048197, |
|
"grad_norm": 0.35899925231933594, |
|
"learning_rate": 0.0004856439204626147, |
|
"loss": 0.0699, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3442189403619291, |
|
"grad_norm": 0.23835837841033936, |
|
"learning_rate": 0.00048547608906541784, |
|
"loss": 0.0763, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.34612070246337623, |
|
"grad_norm": 0.3790901303291321, |
|
"learning_rate": 0.0004853073116778207, |
|
"loss": 0.1081, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34802246456482333, |
|
"grad_norm": 0.4319717288017273, |
|
"learning_rate": 0.0004851375889778614, |
|
"loss": 0.0859, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3499242266662705, |
|
"grad_norm": 0.3756701648235321, |
|
"learning_rate": 0.00048496692164737596, |
|
"loss": 0.1024, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3518259887677176, |
|
"grad_norm": 0.1727105975151062, |
|
"learning_rate": 0.0004847953103719951, |
|
"loss": 0.094, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3537277508691647, |
|
"grad_norm": 0.49954915046691895, |
|
"learning_rate": 0.0004846227558411417, |
|
"loss": 0.0923, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.35562951297061185, |
|
"grad_norm": 0.3634874224662781, |
|
"learning_rate": 0.0004844492587480283, |
|
"loss": 0.0742, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.35753127507205895, |
|
"grad_norm": 0.3322870433330536, |
|
"learning_rate": 0.0004842748197896537, |
|
"loss": 0.0939, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35943303717350605, |
|
"grad_norm": 0.22122009098529816, |
|
"learning_rate": 0.00048409943966680057, |
|
"loss": 0.0669, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.3613347992749532, |
|
"grad_norm": 0.5171657204627991, |
|
"learning_rate": 0.00048392311908403276, |
|
"loss": 0.0649, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3632365613764003, |
|
"grad_norm": 0.45435401797294617, |
|
"learning_rate": 0.0004837458587496921, |
|
"loss": 0.0841, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.36513832347784747, |
|
"grad_norm": 0.35136786103248596, |
|
"learning_rate": 0.00048356765937589597, |
|
"loss": 0.0915, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3670400855792946, |
|
"grad_norm": 0.5352773666381836, |
|
"learning_rate": 0.0004833885216785338, |
|
"loss": 0.0622, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.3689418476807417, |
|
"grad_norm": 0.3906407654285431, |
|
"learning_rate": 0.0004832084463772649, |
|
"loss": 0.0739, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.37084360978218883, |
|
"grad_norm": 0.23543521761894226, |
|
"learning_rate": 0.0004830274341955152, |
|
"loss": 0.0766, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.37274537188363593, |
|
"grad_norm": 0.31237101554870605, |
|
"learning_rate": 0.0004828454858604744, |
|
"loss": 0.0668, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.37464713398508304, |
|
"grad_norm": 0.4033367931842804, |
|
"learning_rate": 0.0004826626021030931, |
|
"loss": 0.0822, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.3765488960865302, |
|
"grad_norm": 0.3153851628303528, |
|
"learning_rate": 0.0004824787836580797, |
|
"loss": 0.0506, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.3784506581879773, |
|
"grad_norm": 0.37774601578712463, |
|
"learning_rate": 0.0004822940312638977, |
|
"loss": 0.0885, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.3803524202894244, |
|
"grad_norm": 0.3738980293273926, |
|
"learning_rate": 0.0004821083456627625, |
|
"loss": 0.0705, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38225418239087156, |
|
"grad_norm": 0.3323149085044861, |
|
"learning_rate": 0.00048192172760063866, |
|
"loss": 0.0765, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.38415594449231866, |
|
"grad_norm": 0.38756170868873596, |
|
"learning_rate": 0.0004817341778272366, |
|
"loss": 0.0826, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.38605770659376576, |
|
"grad_norm": 0.27625781297683716, |
|
"learning_rate": 0.0004815456970960098, |
|
"loss": 0.0931, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.3879594686952129, |
|
"grad_norm": 0.28700143098831177, |
|
"learning_rate": 0.00048135628616415184, |
|
"loss": 0.0782, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.38986123079666, |
|
"grad_norm": 0.3487882912158966, |
|
"learning_rate": 0.0004811659457925931, |
|
"loss": 0.0834, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3917629928981072, |
|
"grad_norm": 0.3156881630420685, |
|
"learning_rate": 0.00048097467674599795, |
|
"loss": 0.076, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.3936647549995543, |
|
"grad_norm": 0.28736022114753723, |
|
"learning_rate": 0.0004807824797927615, |
|
"loss": 0.0656, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.3955665171010014, |
|
"grad_norm": 0.370560884475708, |
|
"learning_rate": 0.0004805893557050065, |
|
"loss": 0.0685, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.39746827920244854, |
|
"grad_norm": 0.32262367010116577, |
|
"learning_rate": 0.00048039530525858067, |
|
"loss": 0.0669, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.39937004130389564, |
|
"grad_norm": 0.21956555545330048, |
|
"learning_rate": 0.00048020032923305284, |
|
"loss": 0.0827, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.40127180340534274, |
|
"grad_norm": 0.295251727104187, |
|
"learning_rate": 0.0004800044284117104, |
|
"loss": 0.0557, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.4031735655067899, |
|
"grad_norm": 1.090646505355835, |
|
"learning_rate": 0.00047980760358155616, |
|
"loss": 0.0861, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.405075327608237, |
|
"grad_norm": 0.44668304920196533, |
|
"learning_rate": 0.0004796098555333045, |
|
"loss": 0.0726, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4069770897096841, |
|
"grad_norm": 0.3137907385826111, |
|
"learning_rate": 0.00047941118506137915, |
|
"loss": 0.0894, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.40887885181113126, |
|
"grad_norm": 0.36221790313720703, |
|
"learning_rate": 0.0004792115929639094, |
|
"loss": 0.0716, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.41078061391257836, |
|
"grad_norm": 0.53058922290802, |
|
"learning_rate": 0.000479011080042727, |
|
"loss": 0.0684, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4126823760140255, |
|
"grad_norm": 0.23870202898979187, |
|
"learning_rate": 0.0004788096471033629, |
|
"loss": 0.0834, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.4145841381154726, |
|
"grad_norm": 0.27887409925460815, |
|
"learning_rate": 0.0004786072949550443, |
|
"loss": 0.0614, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4164859002169197, |
|
"grad_norm": 0.5123117566108704, |
|
"learning_rate": 0.0004784040244106909, |
|
"loss": 0.0667, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.4183876623183669, |
|
"grad_norm": 0.2506055533885956, |
|
"learning_rate": 0.0004781998362869123, |
|
"loss": 0.0651, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.420289424419814, |
|
"grad_norm": 0.48434486985206604, |
|
"learning_rate": 0.0004779947314040039, |
|
"loss": 0.0663, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.4221911865212611, |
|
"grad_norm": 0.3052046597003937, |
|
"learning_rate": 0.0004777887105859444, |
|
"loss": 0.0866, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.42409294862270824, |
|
"grad_norm": 0.4667761027812958, |
|
"learning_rate": 0.00047758177466039197, |
|
"loss": 0.1044, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.42599471072415535, |
|
"grad_norm": 0.25950753688812256, |
|
"learning_rate": 0.0004773739244586812, |
|
"loss": 0.0998, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.42789647282560245, |
|
"grad_norm": 0.38734811544418335, |
|
"learning_rate": 0.0004771651608158194, |
|
"loss": 0.0791, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.4297982349270496, |
|
"grad_norm": 0.5071670413017273, |
|
"learning_rate": 0.0004769554845704838, |
|
"loss": 0.072, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4316999970284967, |
|
"grad_norm": 0.32346102595329285, |
|
"learning_rate": 0.00047674489656501773, |
|
"loss": 0.0563, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.43360175912994386, |
|
"grad_norm": 0.20895633101463318, |
|
"learning_rate": 0.0004765333976454273, |
|
"loss": 0.0642, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.43550352123139097, |
|
"grad_norm": 0.3482305407524109, |
|
"learning_rate": 0.00047632098866137826, |
|
"loss": 0.0716, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.43740528333283807, |
|
"grad_norm": 0.2852407991886139, |
|
"learning_rate": 0.00047610767046619225, |
|
"loss": 0.0745, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4393070454342852, |
|
"grad_norm": 0.45966169238090515, |
|
"learning_rate": 0.0004758934439168436, |
|
"loss": 0.0741, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.44120880753573233, |
|
"grad_norm": 0.21411827206611633, |
|
"learning_rate": 0.00047567830987395597, |
|
"loss": 0.0598, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.44311056963717943, |
|
"grad_norm": 0.36762118339538574, |
|
"learning_rate": 0.0004754622692017985, |
|
"loss": 0.0837, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4450123317386266, |
|
"grad_norm": 0.2668420076370239, |
|
"learning_rate": 0.0004752453227682827, |
|
"loss": 0.0745, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.4469140938400737, |
|
"grad_norm": 0.3845922350883484, |
|
"learning_rate": 0.00047502747144495875, |
|
"loss": 0.0747, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4488158559415208, |
|
"grad_norm": 0.38041210174560547, |
|
"learning_rate": 0.00047480871610701213, |
|
"loss": 0.0743, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.45071761804296795, |
|
"grad_norm": 0.25837719440460205, |
|
"learning_rate": 0.00047458905763326023, |
|
"loss": 0.0689, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.45261938014441505, |
|
"grad_norm": 0.32403454184532166, |
|
"learning_rate": 0.0004743684969061484, |
|
"loss": 0.0669, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.4545211422458622, |
|
"grad_norm": 0.4562000334262848, |
|
"learning_rate": 0.00047414703481174687, |
|
"loss": 0.0888, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.4564229043473093, |
|
"grad_norm": 0.22524714469909668, |
|
"learning_rate": 0.00047392467223974686, |
|
"loss": 0.0748, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4583246664487564, |
|
"grad_norm": 0.33924999833106995, |
|
"learning_rate": 0.0004737014100834571, |
|
"loss": 0.0757, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.46022642855020357, |
|
"grad_norm": 0.26549142599105835, |
|
"learning_rate": 0.0004734772492398003, |
|
"loss": 0.0695, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.46212819065165067, |
|
"grad_norm": 0.4339611530303955, |
|
"learning_rate": 0.0004732521906093097, |
|
"loss": 0.0868, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.4640299527530978, |
|
"grad_norm": 0.3820763826370239, |
|
"learning_rate": 0.00047302623509612484, |
|
"loss": 0.0687, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.46593171485454493, |
|
"grad_norm": 0.2838119864463806, |
|
"learning_rate": 0.00047279938360798884, |
|
"loss": 0.0636, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.46783347695599203, |
|
"grad_norm": 0.26066961884498596, |
|
"learning_rate": 0.00047257163705624394, |
|
"loss": 0.0555, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.46973523905743914, |
|
"grad_norm": 0.23981156945228577, |
|
"learning_rate": 0.00047234299635582835, |
|
"loss": 0.0606, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.4716370011588863, |
|
"grad_norm": 0.3982088267803192, |
|
"learning_rate": 0.0004721134624252722, |
|
"loss": 0.0631, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4735387632603334, |
|
"grad_norm": 0.24350067973136902, |
|
"learning_rate": 0.00047188303618669414, |
|
"loss": 0.0664, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.4754405253617805, |
|
"grad_norm": 0.2303943932056427, |
|
"learning_rate": 0.0004716517185657977, |
|
"loss": 0.0665, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.47734228746322765, |
|
"grad_norm": 0.23348209261894226, |
|
"learning_rate": 0.00047141951049186703, |
|
"loss": 0.0572, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.47924404956467476, |
|
"grad_norm": 0.24423716962337494, |
|
"learning_rate": 0.00047118641289776395, |
|
"loss": 0.099, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.4811458116661219, |
|
"grad_norm": 0.371750146150589, |
|
"learning_rate": 0.00047095242671992346, |
|
"loss": 0.064, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.483047573767569, |
|
"grad_norm": 0.45275604724884033, |
|
"learning_rate": 0.0004707175528983506, |
|
"loss": 0.0745, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.4849493358690161, |
|
"grad_norm": 0.23620209097862244, |
|
"learning_rate": 0.00047048179237661617, |
|
"loss": 0.0594, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4868510979704633, |
|
"grad_norm": 0.13869664072990417, |
|
"learning_rate": 0.00047024514610185316, |
|
"loss": 0.0575, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4887528600719104, |
|
"grad_norm": 0.17631912231445312, |
|
"learning_rate": 0.00047000761502475317, |
|
"loss": 0.0434, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.4906546221733575, |
|
"grad_norm": 0.30484968423843384, |
|
"learning_rate": 0.0004697692000995621, |
|
"loss": 0.0746, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.49255638427480464, |
|
"grad_norm": 0.24606972932815552, |
|
"learning_rate": 0.0004695299022840768, |
|
"loss": 0.0639, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.49445814637625174, |
|
"grad_norm": 0.36455589532852173, |
|
"learning_rate": 0.00046928972253964087, |
|
"loss": 0.0677, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.49635990847769884, |
|
"grad_norm": 0.20996150374412537, |
|
"learning_rate": 0.0004690486618311408, |
|
"loss": 0.0648, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.498261670579146, |
|
"grad_norm": 0.12294875085353851, |
|
"learning_rate": 0.0004688067211270025, |
|
"loss": 0.0561, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.5001634326805932, |
|
"grad_norm": 0.3342492878437042, |
|
"learning_rate": 0.00046856390139918703, |
|
"loss": 0.0627, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.5020651947820403, |
|
"grad_norm": 0.28927573561668396, |
|
"learning_rate": 0.00046832020362318677, |
|
"loss": 0.0947, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5039669568834874, |
|
"grad_norm": 0.2924613952636719, |
|
"learning_rate": 0.00046807562877802144, |
|
"loss": 0.036, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5058687189849345, |
|
"grad_norm": 0.22599062323570251, |
|
"learning_rate": 0.0004678301778462344, |
|
"loss": 0.0671, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.5077704810863816, |
|
"grad_norm": 0.40285536646842957, |
|
"learning_rate": 0.0004675838518138885, |
|
"loss": 0.0557, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5096722431878288, |
|
"grad_norm": 0.35334137082099915, |
|
"learning_rate": 0.0004673366516705623, |
|
"loss": 0.0639, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5115740052892759, |
|
"grad_norm": 0.1980566680431366, |
|
"learning_rate": 0.00046708857840934564, |
|
"loss": 0.0539, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.513475767390723, |
|
"grad_norm": 0.23285962641239166, |
|
"learning_rate": 0.0004668396330268364, |
|
"loss": 0.0646, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5153775294921701, |
|
"grad_norm": 0.23495745658874512, |
|
"learning_rate": 0.00046658981652313573, |
|
"loss": 0.0585, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5172792915936172, |
|
"grad_norm": 0.18733032047748566, |
|
"learning_rate": 0.00046633912990184457, |
|
"loss": 0.0551, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5191810536950643, |
|
"grad_norm": 0.23610015213489532, |
|
"learning_rate": 0.00046608757417005944, |
|
"loss": 0.0543, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5210828157965115, |
|
"grad_norm": 0.35595059394836426, |
|
"learning_rate": 0.00046583515033836833, |
|
"loss": 0.0633, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5229845778979586, |
|
"grad_norm": 0.23900267481803894, |
|
"learning_rate": 0.00046558185942084657, |
|
"loss": 0.0483, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.5248863399994057, |
|
"grad_norm": 0.362981915473938, |
|
"learning_rate": 0.00046532770243505313, |
|
"loss": 0.0462, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5267881021008528, |
|
"grad_norm": 0.5131031274795532, |
|
"learning_rate": 0.0004650726804020261, |
|
"loss": 0.055, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.5286898642022999, |
|
"grad_norm": 0.22358974814414978, |
|
"learning_rate": 0.00046481679434627886, |
|
"loss": 0.0505, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.530591626303747, |
|
"grad_norm": 0.4018985331058502, |
|
"learning_rate": 0.00046456004529579574, |
|
"loss": 0.065, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.5324933884051942, |
|
"grad_norm": 0.14427751302719116, |
|
"learning_rate": 0.00046430243428202824, |
|
"loss": 0.0566, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5343951505066413, |
|
"grad_norm": 0.2335805743932724, |
|
"learning_rate": 0.00046404396233989053, |
|
"loss": 0.0582, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.5362969126080884, |
|
"grad_norm": 0.20551510155200958, |
|
"learning_rate": 0.00046378463050775534, |
|
"loss": 0.069, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5381986747095355, |
|
"grad_norm": 0.2913135588169098, |
|
"learning_rate": 0.0004635244398274501, |
|
"loss": 0.0787, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.5401004368109826, |
|
"grad_norm": 0.25064730644226074, |
|
"learning_rate": 0.0004632633913442524, |
|
"loss": 0.0473, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5420021989124298, |
|
"grad_norm": 0.24504418671131134, |
|
"learning_rate": 0.0004630014861068861, |
|
"loss": 0.0724, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.543903961013877, |
|
"grad_norm": 0.14822843670845032, |
|
"learning_rate": 0.00046273872516751645, |
|
"loss": 0.057, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.545805723115324, |
|
"grad_norm": 0.3440416753292084, |
|
"learning_rate": 0.0004624751095817471, |
|
"loss": 0.0518, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5477074852167712, |
|
"grad_norm": 0.3021232485771179, |
|
"learning_rate": 0.0004622106404086144, |
|
"loss": 0.0353, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5496092473182183, |
|
"grad_norm": 0.27382078766822815, |
|
"learning_rate": 0.00046194531871058435, |
|
"loss": 0.0517, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5515110094196654, |
|
"grad_norm": 0.2883986830711365, |
|
"learning_rate": 0.0004616791455535477, |
|
"loss": 0.0526, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5534127715211126, |
|
"grad_norm": 0.1981579065322876, |
|
"learning_rate": 0.0004614121220068157, |
|
"loss": 0.0441, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.5553145336225597, |
|
"grad_norm": 0.133266881108284, |
|
"learning_rate": 0.000461144249143116, |
|
"loss": 0.0529, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.5572162957240068, |
|
"grad_norm": 0.2734558880329132, |
|
"learning_rate": 0.0004608755280385883, |
|
"loss": 0.0482, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.5591180578254539, |
|
"grad_norm": 0.1929953396320343, |
|
"learning_rate": 0.00046060595977277997, |
|
"loss": 0.0545, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.561019819926901, |
|
"grad_norm": 0.34874358773231506, |
|
"learning_rate": 0.00046033554542864157, |
|
"loss": 0.0563, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5629215820283482, |
|
"grad_norm": 0.2735145390033722, |
|
"learning_rate": 0.00046006428609252293, |
|
"loss": 0.0534, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5648233441297953, |
|
"grad_norm": 0.1838793009519577, |
|
"learning_rate": 0.0004597921828541682, |
|
"loss": 0.04, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.5667251062312424, |
|
"grad_norm": 0.2556362748146057, |
|
"learning_rate": 0.00045951923680671213, |
|
"loss": 0.0659, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.5686268683326895, |
|
"grad_norm": 0.2070973515510559, |
|
"learning_rate": 0.000459245449046675, |
|
"loss": 0.0449, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.5705286304341366, |
|
"grad_norm": 0.31435173749923706, |
|
"learning_rate": 0.0004589708206739587, |
|
"loss": 0.0668, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5724303925355837, |
|
"grad_norm": 0.2508755922317505, |
|
"learning_rate": 0.0004586953527918422, |
|
"loss": 0.0538, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.5743321546370309, |
|
"grad_norm": 0.11883655935525894, |
|
"learning_rate": 0.000458419046506977, |
|
"loss": 0.0414, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.576233916738478, |
|
"grad_norm": 0.08662758767604828, |
|
"learning_rate": 0.0004581419029293828, |
|
"loss": 0.0317, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.5781356788399251, |
|
"grad_norm": 0.22283309698104858, |
|
"learning_rate": 0.0004578639231724429, |
|
"loss": 0.0392, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.5800374409413722, |
|
"grad_norm": 0.25671476125717163, |
|
"learning_rate": 0.0004575851083529, |
|
"loss": 0.0375, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5819392030428193, |
|
"grad_norm": 0.2501511871814728, |
|
"learning_rate": 0.0004573054595908514, |
|
"loss": 0.0367, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5838409651442665, |
|
"grad_norm": 0.196889728307724, |
|
"learning_rate": 0.00045702497800974474, |
|
"loss": 0.0887, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.5857427272457136, |
|
"grad_norm": 0.28852781653404236, |
|
"learning_rate": 0.00045674366473637317, |
|
"loss": 0.0511, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5876444893471607, |
|
"grad_norm": 0.20641787350177765, |
|
"learning_rate": 0.00045646152090087145, |
|
"loss": 0.0594, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.5895462514486078, |
|
"grad_norm": 0.1833396703004837, |
|
"learning_rate": 0.0004561785476367106, |
|
"loss": 0.0476, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5914480135500549, |
|
"grad_norm": 0.3527635335922241, |
|
"learning_rate": 0.0004558947460806939, |
|
"loss": 0.0429, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.593349775651502, |
|
"grad_norm": 0.2580285668373108, |
|
"learning_rate": 0.00045561011737295235, |
|
"loss": 0.0307, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5952515377529493, |
|
"grad_norm": 0.21790006756782532, |
|
"learning_rate": 0.0004553246626569395, |
|
"loss": 0.0314, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.5971532998543964, |
|
"grad_norm": 0.30504387617111206, |
|
"learning_rate": 0.00045503838307942756, |
|
"loss": 0.0398, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.5990550619558435, |
|
"grad_norm": 0.24918900430202484, |
|
"learning_rate": 0.00045475127979050254, |
|
"loss": 0.0473, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6009568240572906, |
|
"grad_norm": 0.30492204427719116, |
|
"learning_rate": 0.00045446335394355947, |
|
"loss": 0.0536, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.6028585861587377, |
|
"grad_norm": 0.23205405473709106, |
|
"learning_rate": 0.0004541746066952978, |
|
"loss": 0.0219, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.6047603482601849, |
|
"grad_norm": 0.25788795948028564, |
|
"learning_rate": 0.000453885039205717, |
|
"loss": 0.0458, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.606662110361632, |
|
"grad_norm": 0.14208689332008362, |
|
"learning_rate": 0.0004535946526381117, |
|
"loss": 0.0583, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6085638724630791, |
|
"grad_norm": 0.3792324364185333, |
|
"learning_rate": 0.0004533034481590671, |
|
"loss": 0.0543, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6104656345645262, |
|
"grad_norm": 0.11220109462738037, |
|
"learning_rate": 0.00045301142693845406, |
|
"loss": 0.06, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.6123673966659733, |
|
"grad_norm": 0.37494245171546936, |
|
"learning_rate": 0.0004527185901494247, |
|
"loss": 0.0333, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6142691587674204, |
|
"grad_norm": 0.181362122297287, |
|
"learning_rate": 0.0004524249389684075, |
|
"loss": 0.0413, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6161709208688676, |
|
"grad_norm": 0.1204942986369133, |
|
"learning_rate": 0.0004521304745751029, |
|
"loss": 0.0606, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6180726829703147, |
|
"grad_norm": 0.28631624579429626, |
|
"learning_rate": 0.00045183519815247803, |
|
"loss": 0.0511, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.6199744450717618, |
|
"grad_norm": 0.3006780743598938, |
|
"learning_rate": 0.00045153911088676216, |
|
"loss": 0.0603, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6218762071732089, |
|
"grad_norm": 0.20401684939861298, |
|
"learning_rate": 0.00045124221396744226, |
|
"loss": 0.0462, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.623777969274656, |
|
"grad_norm": 0.12001892924308777, |
|
"learning_rate": 0.00045094450858725775, |
|
"loss": 0.0624, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6256797313761032, |
|
"grad_norm": 0.15589171648025513, |
|
"learning_rate": 0.0004506459959421962, |
|
"loss": 0.0518, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.6275814934775503, |
|
"grad_norm": 0.24381360411643982, |
|
"learning_rate": 0.0004503466772314878, |
|
"loss": 0.0516, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6294832555789974, |
|
"grad_norm": 0.24374118447303772, |
|
"learning_rate": 0.0004500465536576015, |
|
"loss": 0.0428, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.6313850176804445, |
|
"grad_norm": 0.25958311557769775, |
|
"learning_rate": 0.00044974562642623926, |
|
"loss": 0.0385, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.6332867797818916, |
|
"grad_norm": 0.20062792301177979, |
|
"learning_rate": 0.0004494438967463318, |
|
"loss": 0.0327, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.6351885418833387, |
|
"grad_norm": 0.21975015103816986, |
|
"learning_rate": 0.0004491413658300336, |
|
"loss": 0.0476, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6370903039847859, |
|
"grad_norm": 0.2386341094970703, |
|
"learning_rate": 0.00044883803489271785, |
|
"loss": 0.052, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.638992066086233, |
|
"grad_norm": 0.24763132631778717, |
|
"learning_rate": 0.00044853390515297176, |
|
"loss": 0.0552, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6408938281876801, |
|
"grad_norm": 0.24076853692531586, |
|
"learning_rate": 0.00044822897783259176, |
|
"loss": 0.0456, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.6427955902891272, |
|
"grad_norm": 0.22434252500534058, |
|
"learning_rate": 0.0004479232541565782, |
|
"loss": 0.0467, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6446973523905744, |
|
"grad_norm": 0.12866199016571045, |
|
"learning_rate": 0.00044761673535313084, |
|
"loss": 0.0454, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.6465991144920216, |
|
"grad_norm": 0.2736774682998657, |
|
"learning_rate": 0.0004473094226536436, |
|
"loss": 0.0451, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6485008765934687, |
|
"grad_norm": 0.29224392771720886, |
|
"learning_rate": 0.0004470013172926999, |
|
"loss": 0.0508, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.6504026386949158, |
|
"grad_norm": 0.23447951674461365, |
|
"learning_rate": 0.00044669242050806766, |
|
"loss": 0.0479, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6523044007963629, |
|
"grad_norm": 0.2928425967693329, |
|
"learning_rate": 0.0004463827335406938, |
|
"loss": 0.0266, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.65420616289781, |
|
"grad_norm": 0.282113641500473, |
|
"learning_rate": 0.0004460722576347002, |
|
"loss": 0.0442, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6561079249992571, |
|
"grad_norm": 0.24653756618499756, |
|
"learning_rate": 0.0004457609940373777, |
|
"loss": 0.0392, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6580096871007043, |
|
"grad_norm": 0.2997114658355713, |
|
"learning_rate": 0.00044544894399918214, |
|
"loss": 0.0477, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.6599114492021514, |
|
"grad_norm": 0.08997475355863571, |
|
"learning_rate": 0.00044513610877372814, |
|
"loss": 0.0424, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.6618132113035985, |
|
"grad_norm": 0.18397915363311768, |
|
"learning_rate": 0.00044482248961778516, |
|
"loss": 0.0339, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.6637149734050456, |
|
"grad_norm": 0.18064412474632263, |
|
"learning_rate": 0.00044450808779127185, |
|
"loss": 0.0437, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.6656167355064927, |
|
"grad_norm": 0.18558929860591888, |
|
"learning_rate": 0.00044419290455725103, |
|
"loss": 0.0554, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6675184976079399, |
|
"grad_norm": 0.3115510940551758, |
|
"learning_rate": 0.00044387694118192477, |
|
"loss": 0.0458, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.669420259709387, |
|
"grad_norm": 0.4627574384212494, |
|
"learning_rate": 0.0004435601989346293, |
|
"loss": 0.0463, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.6713220218108341, |
|
"grad_norm": 0.17065434157848358, |
|
"learning_rate": 0.0004432426790878298, |
|
"loss": 0.0457, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.6732237839122812, |
|
"grad_norm": 0.3017977178096771, |
|
"learning_rate": 0.00044292438291711536, |
|
"loss": 0.0452, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.6751255460137283, |
|
"grad_norm": 0.21006427705287933, |
|
"learning_rate": 0.00044260531170119377, |
|
"loss": 0.041, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6770273081151754, |
|
"grad_norm": 0.14174698293209076, |
|
"learning_rate": 0.0004422854667218865, |
|
"loss": 0.0421, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.6789290702166226, |
|
"grad_norm": 0.21689032018184662, |
|
"learning_rate": 0.00044196484926412364, |
|
"loss": 0.0426, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.6808308323180697, |
|
"grad_norm": 0.25217998027801514, |
|
"learning_rate": 0.00044164346061593827, |
|
"loss": 0.0616, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.6827325944195168, |
|
"grad_norm": 0.07392685115337372, |
|
"learning_rate": 0.0004413213020684619, |
|
"loss": 0.0366, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.6846343565209639, |
|
"grad_norm": 0.2156752347946167, |
|
"learning_rate": 0.0004409983749159189, |
|
"loss": 0.0352, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.686536118622411, |
|
"grad_norm": 0.007870941422879696, |
|
"learning_rate": 0.0004406746804556214, |
|
"loss": 0.0342, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.6884378807238583, |
|
"grad_norm": 0.18450793623924255, |
|
"learning_rate": 0.000440350219987964, |
|
"loss": 0.0292, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.6903396428253054, |
|
"grad_norm": 0.3035867214202881, |
|
"learning_rate": 0.00044002499481641876, |
|
"loss": 0.0439, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.6922414049267525, |
|
"grad_norm": 0.16944675147533417, |
|
"learning_rate": 0.0004396990062475299, |
|
"loss": 0.0368, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.6941431670281996, |
|
"grad_norm": 0.17465433478355408, |
|
"learning_rate": 0.0004393722555909081, |
|
"loss": 0.0512, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6960449291296467, |
|
"grad_norm": 0.4197070002555847, |
|
"learning_rate": 0.0004390447441592259, |
|
"loss": 0.0494, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.6979466912310938, |
|
"grad_norm": 0.21269617974758148, |
|
"learning_rate": 0.0004387164732682122, |
|
"loss": 0.0432, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.699848453332541, |
|
"grad_norm": 0.3487708270549774, |
|
"learning_rate": 0.0004383874442366468, |
|
"loss": 0.0405, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.7017502154339881, |
|
"grad_norm": 0.3449212908744812, |
|
"learning_rate": 0.0004380576583863551, |
|
"loss": 0.0604, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.7036519775354352, |
|
"grad_norm": 0.1568797379732132, |
|
"learning_rate": 0.0004377271170422031, |
|
"loss": 0.0493, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7055537396368823, |
|
"grad_norm": 0.18511539697647095, |
|
"learning_rate": 0.0004373958215320918, |
|
"loss": 0.0313, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.7074555017383294, |
|
"grad_norm": 0.12269023805856705, |
|
"learning_rate": 0.0004370637731869521, |
|
"loss": 0.0343, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.7093572638397766, |
|
"grad_norm": 0.1673561930656433, |
|
"learning_rate": 0.000436730973340739, |
|
"loss": 0.033, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7112590259412237, |
|
"grad_norm": 0.3525213301181793, |
|
"learning_rate": 0.00043639742333042675, |
|
"loss": 0.1303, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.7131607880426708, |
|
"grad_norm": 0.33365005254745483, |
|
"learning_rate": 0.00043606312449600334, |
|
"loss": 0.1418, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7150625501441179, |
|
"grad_norm": 0.3005249500274658, |
|
"learning_rate": 0.00043572807818046484, |
|
"loss": 0.0664, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.716964312245565, |
|
"grad_norm": 0.4880094826221466, |
|
"learning_rate": 0.00043539228572981036, |
|
"loss": 0.0788, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7188660743470121, |
|
"grad_norm": 0.7306382656097412, |
|
"learning_rate": 0.00043505574849303654, |
|
"loss": 0.1543, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7207678364484593, |
|
"grad_norm": 0.3662378489971161, |
|
"learning_rate": 0.00043471846782213184, |
|
"loss": 0.0682, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7226695985499064, |
|
"grad_norm": 0.4383717477321625, |
|
"learning_rate": 0.0004343804450720717, |
|
"loss": 0.4356, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7245713606513535, |
|
"grad_norm": 0.24195566773414612, |
|
"learning_rate": 0.0004340416816008125, |
|
"loss": 0.058, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.7264731227528006, |
|
"grad_norm": 0.746701717376709, |
|
"learning_rate": 0.00043370217876928643, |
|
"loss": 0.137, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7283748848542477, |
|
"grad_norm": 0.33316388726234436, |
|
"learning_rate": 0.000433361937941396, |
|
"loss": 0.0669, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.7302766469556949, |
|
"grad_norm": 0.5440863370895386, |
|
"learning_rate": 0.00043302096048400846, |
|
"loss": 0.0573, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.732178409057142, |
|
"grad_norm": 0.20268595218658447, |
|
"learning_rate": 0.00043267924776695034, |
|
"loss": 0.0647, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.7340801711585891, |
|
"grad_norm": 0.12712806463241577, |
|
"learning_rate": 0.0004323368011630021, |
|
"loss": 0.0656, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.7359819332600362, |
|
"grad_norm": 0.25622451305389404, |
|
"learning_rate": 0.00043199362204789224, |
|
"loss": 0.0369, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.7378836953614833, |
|
"grad_norm": 0.48822298645973206, |
|
"learning_rate": 0.0004316497118002922, |
|
"loss": 0.0612, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.7397854574629305, |
|
"grad_norm": 0.6518691778182983, |
|
"learning_rate": 0.0004313050718018107, |
|
"loss": 0.0536, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.7416872195643777, |
|
"grad_norm": 0.4019169509410858, |
|
"learning_rate": 0.0004309597034369878, |
|
"loss": 0.0511, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7435889816658248, |
|
"grad_norm": 0.3208947479724884, |
|
"learning_rate": 0.00043061360809329007, |
|
"loss": 0.0612, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.7454907437672719, |
|
"grad_norm": 0.6742276549339294, |
|
"learning_rate": 0.0004302667871611045, |
|
"loss": 0.05, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.747392505868719, |
|
"grad_norm": 0.32614463567733765, |
|
"learning_rate": 0.0004299192420337326, |
|
"loss": 0.0508, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.7492942679701661, |
|
"grad_norm": 0.407650887966156, |
|
"learning_rate": 0.0004295709741073859, |
|
"loss": 0.0459, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7511960300716132, |
|
"grad_norm": 0.11781582981348038, |
|
"learning_rate": 0.00042922198478117927, |
|
"loss": 0.0452, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7530977921730604, |
|
"grad_norm": 0.10125182569026947, |
|
"learning_rate": 0.0004288722754571257, |
|
"loss": 0.0391, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.7549995542745075, |
|
"grad_norm": 0.19000868499279022, |
|
"learning_rate": 0.00042852184754013075, |
|
"loss": 0.0345, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.7569013163759546, |
|
"grad_norm": 0.00830161478370428, |
|
"learning_rate": 0.00042817070243798686, |
|
"loss": 0.0446, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.7588030784774017, |
|
"grad_norm": 0.37715670466423035, |
|
"learning_rate": 0.0004278188415613675, |
|
"loss": 0.0533, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.7607048405788488, |
|
"grad_norm": 0.02961985021829605, |
|
"learning_rate": 0.000427466266323822, |
|
"loss": 0.0274, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.762606602680296, |
|
"grad_norm": 0.3095683753490448, |
|
"learning_rate": 0.00042711297814176897, |
|
"loss": 0.0333, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.7645083647817431, |
|
"grad_norm": 0.17599205672740936, |
|
"learning_rate": 0.0004267589784344917, |
|
"loss": 0.0354, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.7664101268831902, |
|
"grad_norm": 0.29849332571029663, |
|
"learning_rate": 0.00042640426862413164, |
|
"loss": 0.0388, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.7683118889846373, |
|
"grad_norm": 0.2587386667728424, |
|
"learning_rate": 0.00042604885013568304, |
|
"loss": 0.0535, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.7702136510860844, |
|
"grad_norm": 0.1985943764448166, |
|
"learning_rate": 0.00042569272439698725, |
|
"loss": 0.0357, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7721154131875315, |
|
"grad_norm": 0.28795111179351807, |
|
"learning_rate": 0.00042533589283872677, |
|
"loss": 0.0437, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.7740171752889787, |
|
"grad_norm": 0.15408733487129211, |
|
"learning_rate": 0.0004249783568944197, |
|
"loss": 0.051, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.7759189373904258, |
|
"grad_norm": 0.35579803586006165, |
|
"learning_rate": 0.00042462011800041376, |
|
"loss": 0.0445, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.7778206994918729, |
|
"grad_norm": 0.1227014809846878, |
|
"learning_rate": 0.0004242611775958809, |
|
"loss": 0.0333, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.77972246159332, |
|
"grad_norm": 0.3193877339363098, |
|
"learning_rate": 0.0004239015371228111, |
|
"loss": 0.0273, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7816242236947671, |
|
"grad_norm": 0.09365034103393555, |
|
"learning_rate": 0.0004235411980260069, |
|
"loss": 0.0252, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.7835259857962144, |
|
"grad_norm": 0.28869110345840454, |
|
"learning_rate": 0.0004231801617530773, |
|
"loss": 0.035, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.7854277478976615, |
|
"grad_norm": 0.2648963928222656, |
|
"learning_rate": 0.0004228184297544323, |
|
"loss": 0.0579, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.7873295099991086, |
|
"grad_norm": 0.24735620617866516, |
|
"learning_rate": 0.00042245600348327664, |
|
"loss": 0.0321, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.7892312721005557, |
|
"grad_norm": 0.14888009428977966, |
|
"learning_rate": 0.00042209288439560444, |
|
"loss": 0.0347, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7911330342020028, |
|
"grad_norm": 0.11169561743736267, |
|
"learning_rate": 0.0004217290739501929, |
|
"loss": 0.0409, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.7930347963034499, |
|
"grad_norm": 0.1095479279756546, |
|
"learning_rate": 0.0004213645736085968, |
|
"loss": 0.0323, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.7949365584048971, |
|
"grad_norm": 0.21435153484344482, |
|
"learning_rate": 0.00042099938483514235, |
|
"loss": 0.0418, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.7968383205063442, |
|
"grad_norm": 0.1274380087852478, |
|
"learning_rate": 0.0004206335090969215, |
|
"loss": 0.04, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.7987400826077913, |
|
"grad_norm": 0.10879164189100266, |
|
"learning_rate": 0.00042026694786378603, |
|
"loss": 0.0443, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.8006418447092384, |
|
"grad_norm": 0.21737106144428253, |
|
"learning_rate": 0.00041989970260834157, |
|
"loss": 0.0333, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.8025436068106855, |
|
"grad_norm": 0.3080846071243286, |
|
"learning_rate": 0.00041953177480594163, |
|
"loss": 0.0521, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.8044453689121327, |
|
"grad_norm": 0.23476336896419525, |
|
"learning_rate": 0.0004191631659346818, |
|
"loss": 0.0312, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.8063471310135798, |
|
"grad_norm": 0.311959832906723, |
|
"learning_rate": 0.00041879387747539376, |
|
"loss": 0.0351, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.8082488931150269, |
|
"grad_norm": 0.24322810769081116, |
|
"learning_rate": 0.0004184239109116393, |
|
"loss": 0.0426, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.810150655216474, |
|
"grad_norm": 0.2684605121612549, |
|
"learning_rate": 0.00041805326772970455, |
|
"loss": 0.0313, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.8120524173179211, |
|
"grad_norm": 0.2264401763677597, |
|
"learning_rate": 0.0004176819494185936, |
|
"loss": 0.0499, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.8139541794193682, |
|
"grad_norm": 0.12556996941566467, |
|
"learning_rate": 0.00041730995747002296, |
|
"loss": 0.0269, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.8158559415208154, |
|
"grad_norm": 0.22863665223121643, |
|
"learning_rate": 0.00041693729337841546, |
|
"loss": 0.0404, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8177577036222625, |
|
"grad_norm": 0.007670534774661064, |
|
"learning_rate": 0.00041656395864089383, |
|
"loss": 0.0318, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8196594657237096, |
|
"grad_norm": 0.2585577368736267, |
|
"learning_rate": 0.0004161899547572753, |
|
"loss": 0.0293, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8215612278251567, |
|
"grad_norm": 0.36932700872421265, |
|
"learning_rate": 0.00041581528323006526, |
|
"loss": 0.0379, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8234629899266038, |
|
"grad_norm": 0.2459120750427246, |
|
"learning_rate": 0.0004154399455644512, |
|
"loss": 0.0281, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.825364752028051, |
|
"grad_norm": 0.03090517409145832, |
|
"learning_rate": 0.0004150639432682967, |
|
"loss": 0.0401, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.8272665141294981, |
|
"grad_norm": 0.11881538480520248, |
|
"learning_rate": 0.0004146872778521355, |
|
"loss": 0.0376, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.8291682762309452, |
|
"grad_norm": 0.2136705368757248, |
|
"learning_rate": 0.0004143099508291652, |
|
"loss": 0.0247, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.8310700383323923, |
|
"grad_norm": 0.22370411455631256, |
|
"learning_rate": 0.00041393196371524143, |
|
"loss": 0.0287, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.8329718004338394, |
|
"grad_norm": 0.26691102981567383, |
|
"learning_rate": 0.00041355331802887156, |
|
"loss": 0.0329, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.8348735625352866, |
|
"grad_norm": 0.16438519954681396, |
|
"learning_rate": 0.00041317401529120866, |
|
"loss": 0.0281, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.8367753246367338, |
|
"grad_norm": 0.1785949021577835, |
|
"learning_rate": 0.0004127940570260456, |
|
"loss": 0.0313, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8386770867381809, |
|
"grad_norm": 0.11144755035638809, |
|
"learning_rate": 0.00041241344475980823, |
|
"loss": 0.0283, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.840578848839628, |
|
"grad_norm": 0.3532226085662842, |
|
"learning_rate": 0.00041203218002155046, |
|
"loss": 0.027, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.8424806109410751, |
|
"grad_norm": 0.15130820870399475, |
|
"learning_rate": 0.0004116502643429469, |
|
"loss": 0.0283, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.8443823730425222, |
|
"grad_norm": 0.10683543235063553, |
|
"learning_rate": 0.00041126769925828733, |
|
"loss": 0.0256, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.8462841351439694, |
|
"grad_norm": 0.12012962251901627, |
|
"learning_rate": 0.0004108844863044706, |
|
"loss": 0.0244, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.8481858972454165, |
|
"grad_norm": 0.209054633975029, |
|
"learning_rate": 0.00041050062702099795, |
|
"loss": 0.0371, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.8500876593468636, |
|
"grad_norm": 0.27558189630508423, |
|
"learning_rate": 0.00041011612294996746, |
|
"loss": 0.0241, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.8519894214483107, |
|
"grad_norm": 0.2076425403356552, |
|
"learning_rate": 0.0004097309756360674, |
|
"loss": 0.0376, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.8538911835497578, |
|
"grad_norm": 0.1804133802652359, |
|
"learning_rate": 0.00040934518662657035, |
|
"loss": 0.0412, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.8557929456512049, |
|
"grad_norm": 0.30146369338035583, |
|
"learning_rate": 0.0004089587574713264, |
|
"loss": 0.0271, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8576947077526521, |
|
"grad_norm": 0.24618133902549744, |
|
"learning_rate": 0.00040857168972275785, |
|
"loss": 0.0348, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.8595964698540992, |
|
"grad_norm": 0.1946696937084198, |
|
"learning_rate": 0.00040818398493585185, |
|
"loss": 0.0217, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.8614982319555463, |
|
"grad_norm": 0.14857546985149384, |
|
"learning_rate": 0.0004077956446681554, |
|
"loss": 0.0429, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.8633999940569934, |
|
"grad_norm": 0.1880268156528473, |
|
"learning_rate": 0.0004074066704797682, |
|
"loss": 0.0476, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.8653017561584405, |
|
"grad_norm": 0.13485951721668243, |
|
"learning_rate": 0.00040701706393333635, |
|
"loss": 0.0369, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.8672035182598877, |
|
"grad_norm": 0.022607989609241486, |
|
"learning_rate": 0.00040662682659404684, |
|
"loss": 0.017, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.8691052803613348, |
|
"grad_norm": 0.4116274416446686, |
|
"learning_rate": 0.00040623596002962027, |
|
"loss": 0.0333, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.8710070424627819, |
|
"grad_norm": 0.1159391701221466, |
|
"learning_rate": 0.0004058444658103055, |
|
"loss": 0.0206, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.872908804564229, |
|
"grad_norm": 0.22013840079307556, |
|
"learning_rate": 0.00040545234550887264, |
|
"loss": 0.0394, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.8748105666656761, |
|
"grad_norm": 0.1728079915046692, |
|
"learning_rate": 0.0004050596007006072, |
|
"loss": 0.0435, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 0.14915741980075836, |
|
"learning_rate": 0.0004046662329633032, |
|
"loss": 0.0402, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.8786140908685705, |
|
"grad_norm": 0.20652537047863007, |
|
"learning_rate": 0.0004042722438772576, |
|
"loss": 0.037, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.8805158529700176, |
|
"grad_norm": 0.2016768902540207, |
|
"learning_rate": 0.00040387763502526325, |
|
"loss": 0.0247, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.8824176150714647, |
|
"grad_norm": 0.07532606273889542, |
|
"learning_rate": 0.00040348240799260296, |
|
"loss": 0.0302, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.8843193771729118, |
|
"grad_norm": 0.16426606476306915, |
|
"learning_rate": 0.00040308656436704294, |
|
"loss": 0.0219, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8862211392743589, |
|
"grad_norm": 0.2340356707572937, |
|
"learning_rate": 0.0004026901057388265, |
|
"loss": 0.0347, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.8881229013758061, |
|
"grad_norm": 0.019909365102648735, |
|
"learning_rate": 0.0004022930337006676, |
|
"loss": 0.0411, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.8900246634772532, |
|
"grad_norm": 0.17835919559001923, |
|
"learning_rate": 0.0004018953498477444, |
|
"loss": 0.0393, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.8919264255787003, |
|
"grad_norm": 0.13876459002494812, |
|
"learning_rate": 0.00040149705577769313, |
|
"loss": 0.0401, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.8938281876801474, |
|
"grad_norm": 0.16416268050670624, |
|
"learning_rate": 0.00040109815309060135, |
|
"loss": 0.0396, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8957299497815945, |
|
"grad_norm": 0.09078884869813919, |
|
"learning_rate": 0.0004006986433890017, |
|
"loss": 0.039, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.8976317118830416, |
|
"grad_norm": 0.08101935684680939, |
|
"learning_rate": 0.00040029852827786535, |
|
"loss": 0.0269, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.8995334739844888, |
|
"grad_norm": 0.12669484317302704, |
|
"learning_rate": 0.00039989780936459566, |
|
"loss": 0.0392, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.9014352360859359, |
|
"grad_norm": 0.11005831509828568, |
|
"learning_rate": 0.00039949648825902165, |
|
"loss": 0.0378, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.903336998187383, |
|
"grad_norm": 0.21079300343990326, |
|
"learning_rate": 0.0003990945665733916, |
|
"loss": 0.0334, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.9052387602888301, |
|
"grad_norm": 0.07260707765817642, |
|
"learning_rate": 0.0003986920459223665, |
|
"loss": 0.0287, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.9071405223902772, |
|
"grad_norm": 0.01923411153256893, |
|
"learning_rate": 0.0003982889279230135, |
|
"loss": 0.0454, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.9090422844917244, |
|
"grad_norm": 0.13614831864833832, |
|
"learning_rate": 0.0003978852141947998, |
|
"loss": 0.0407, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.9109440465931715, |
|
"grad_norm": 0.1618584245443344, |
|
"learning_rate": 0.00039748090635958555, |
|
"loss": 0.0389, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.9128458086946186, |
|
"grad_norm": 0.16158536076545715, |
|
"learning_rate": 0.00039707600604161773, |
|
"loss": 0.0343, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.9147475707960657, |
|
"grad_norm": 0.1736435741186142, |
|
"learning_rate": 0.00039667051486752357, |
|
"loss": 0.0204, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.9166493328975128, |
|
"grad_norm": 0.15715183317661285, |
|
"learning_rate": 0.000396264434466304, |
|
"loss": 0.0423, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.9185510949989599, |
|
"grad_norm": 0.1514957845211029, |
|
"learning_rate": 0.00039585776646932703, |
|
"loss": 0.0329, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.9204528571004071, |
|
"grad_norm": 0.0464215911924839, |
|
"learning_rate": 0.0003954505125103212, |
|
"loss": 0.0281, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9223546192018542, |
|
"grad_norm": 0.24331432580947876, |
|
"learning_rate": 0.0003950426742253692, |
|
"loss": 0.0385, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.9242563813033013, |
|
"grad_norm": 0.006427076645195484, |
|
"learning_rate": 0.00039463425325290095, |
|
"loss": 0.0312, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9261581434047484, |
|
"grad_norm": 0.3959774971008301, |
|
"learning_rate": 0.00039422525123368755, |
|
"loss": 0.0168, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.9280599055061955, |
|
"grad_norm": 0.14587469398975372, |
|
"learning_rate": 0.0003938156698108342, |
|
"loss": 0.0319, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9299616676076428, |
|
"grad_norm": 0.1308862864971161, |
|
"learning_rate": 0.0003934055106297735, |
|
"loss": 0.0247, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.9318634297090899, |
|
"grad_norm": 0.03495126590132713, |
|
"learning_rate": 0.0003929947753382596, |
|
"loss": 0.0199, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.933765191810537, |
|
"grad_norm": 0.1539415717124939, |
|
"learning_rate": 0.0003925834655863608, |
|
"loss": 0.0375, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.9356669539119841, |
|
"grad_norm": 0.1579003781080246, |
|
"learning_rate": 0.00039217158302645326, |
|
"loss": 0.0295, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.9375687160134312, |
|
"grad_norm": 0.007031635381281376, |
|
"learning_rate": 0.00039175912931321426, |
|
"loss": 0.0258, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.9394704781148783, |
|
"grad_norm": 0.3409572243690491, |
|
"learning_rate": 0.00039134610610361574, |
|
"loss": 0.0351, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.9413722402163255, |
|
"grad_norm": 0.27617499232292175, |
|
"learning_rate": 0.00039093251505691745, |
|
"loss": 0.0224, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.9432740023177726, |
|
"grad_norm": 0.18574129045009613, |
|
"learning_rate": 0.00039051835783466025, |
|
"loss": 0.0258, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.9451757644192197, |
|
"grad_norm": 0.006668297573924065, |
|
"learning_rate": 0.0003901036361006596, |
|
"loss": 0.0241, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.9470775265206668, |
|
"grad_norm": 0.14266005158424377, |
|
"learning_rate": 0.00038968835152099874, |
|
"loss": 0.0262, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.9489792886221139, |
|
"grad_norm": 0.18967001140117645, |
|
"learning_rate": 0.00038927250576402227, |
|
"loss": 0.0233, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.950881050723561, |
|
"grad_norm": 0.1983349472284317, |
|
"learning_rate": 0.00038885610050032896, |
|
"loss": 0.0364, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9527828128250082, |
|
"grad_norm": 0.1807214319705963, |
|
"learning_rate": 0.00038843913740276546, |
|
"loss": 0.0345, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.9546845749264553, |
|
"grad_norm": 0.336378812789917, |
|
"learning_rate": 0.0003880216181464195, |
|
"loss": 0.021, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.9565863370279024, |
|
"grad_norm": 0.2094445377588272, |
|
"learning_rate": 0.0003876035444086129, |
|
"loss": 0.0453, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.9584880991293495, |
|
"grad_norm": 0.1133272647857666, |
|
"learning_rate": 0.0003871849178688952, |
|
"loss": 0.0339, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.9603898612307966, |
|
"grad_norm": 0.21298140287399292, |
|
"learning_rate": 0.00038676574020903666, |
|
"loss": 0.0417, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.9622916233322438, |
|
"grad_norm": 0.2184475064277649, |
|
"learning_rate": 0.00038634601311302166, |
|
"loss": 0.02, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.9641933854336909, |
|
"grad_norm": 0.13946540653705597, |
|
"learning_rate": 0.0003859257382670417, |
|
"loss": 0.0258, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.966095147535138, |
|
"grad_norm": 0.09975899010896683, |
|
"learning_rate": 0.000385504917359489, |
|
"loss": 0.0261, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.9679969096365851, |
|
"grad_norm": 0.10357426851987839, |
|
"learning_rate": 0.00038508355208094925, |
|
"loss": 0.0367, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.9698986717380322, |
|
"grad_norm": 0.2898187041282654, |
|
"learning_rate": 0.0003846616441241953, |
|
"loss": 0.0398, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9718004338394793, |
|
"grad_norm": 0.21133796870708466, |
|
"learning_rate": 0.00038423919518418, |
|
"loss": 0.0311, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.9737021959409266, |
|
"grad_norm": 0.20074529945850372, |
|
"learning_rate": 0.0003838162069580296, |
|
"loss": 0.0319, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.9756039580423737, |
|
"grad_norm": 0.10800763219594955, |
|
"learning_rate": 0.0003833926811450368, |
|
"loss": 0.0331, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.9775057201438208, |
|
"grad_norm": 0.17509090900421143, |
|
"learning_rate": 0.0003829686194466539, |
|
"loss": 0.0176, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.9794074822452679, |
|
"grad_norm": 0.16533738374710083, |
|
"learning_rate": 0.0003825440235664863, |
|
"loss": 0.0182, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.981309244346715, |
|
"grad_norm": 0.15933182835578918, |
|
"learning_rate": 0.000382118895210285, |
|
"loss": 0.0398, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.9832110064481622, |
|
"grad_norm": 0.2111063301563263, |
|
"learning_rate": 0.0003816932360859405, |
|
"loss": 0.0247, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.9851127685496093, |
|
"grad_norm": 0.18295446038246155, |
|
"learning_rate": 0.0003812670479034754, |
|
"loss": 0.041, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.9870145306510564, |
|
"grad_norm": 0.17066188156604767, |
|
"learning_rate": 0.0003808403323750379, |
|
"loss": 0.032, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.9889162927525035, |
|
"grad_norm": 0.17090731859207153, |
|
"learning_rate": 0.00038041309121489443, |
|
"loss": 0.0313, |
|
"step": 5200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 15774, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3194238878926234e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|