|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9998204829009962, |
|
"eval_steps": 500, |
|
"global_step": 5570, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003590341980073602, |
|
"grad_norm": 6.1201090812683105, |
|
"learning_rate": 1.7953321364452425e-06, |
|
"loss": 2.9926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007180683960147204, |
|
"grad_norm": 4.0763983726501465, |
|
"learning_rate": 3.590664272890485e-06, |
|
"loss": 2.7866, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010771025940220806, |
|
"grad_norm": 3.385845184326172, |
|
"learning_rate": 5.385996409335727e-06, |
|
"loss": 2.0378, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014361367920294408, |
|
"grad_norm": 5.195909023284912, |
|
"learning_rate": 7.18132854578097e-06, |
|
"loss": 1.2251, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01795170990036801, |
|
"grad_norm": 2.219606637954712, |
|
"learning_rate": 8.976660682226211e-06, |
|
"loss": 0.6834, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02154205188044161, |
|
"grad_norm": 16.839906692504883, |
|
"learning_rate": 1.0771992818671454e-05, |
|
"loss": 0.4754, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025132393860515214, |
|
"grad_norm": 25.55668067932129, |
|
"learning_rate": 1.2567324955116697e-05, |
|
"loss": 0.3818, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028722735840588817, |
|
"grad_norm": 1.359479546546936, |
|
"learning_rate": 1.436265709156194e-05, |
|
"loss": 0.3797, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.032313077820662416, |
|
"grad_norm": 1.2728756666183472, |
|
"learning_rate": 1.615798922800718e-05, |
|
"loss": 0.3712, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03590341980073602, |
|
"grad_norm": 1.9393813610076904, |
|
"learning_rate": 1.7953321364452423e-05, |
|
"loss": 0.3564, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03949376178080962, |
|
"grad_norm": 1.4643720388412476, |
|
"learning_rate": 1.9748653500897668e-05, |
|
"loss": 0.3438, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04308410376088322, |
|
"grad_norm": 1.4880571365356445, |
|
"learning_rate": 2.154398563734291e-05, |
|
"loss": 0.321, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.046674445740956826, |
|
"grad_norm": 1.239957571029663, |
|
"learning_rate": 2.3339317773788153e-05, |
|
"loss": 0.319, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05026478772103043, |
|
"grad_norm": 1.2627112865447998, |
|
"learning_rate": 2.5134649910233395e-05, |
|
"loss": 0.3128, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05385512970110403, |
|
"grad_norm": 1.1520243883132935, |
|
"learning_rate": 2.6929982046678636e-05, |
|
"loss": 0.31, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05744547168117763, |
|
"grad_norm": 1.8554497957229614, |
|
"learning_rate": 2.872531418312388e-05, |
|
"loss": 0.3167, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.061035813661251236, |
|
"grad_norm": 1.8501205444335938, |
|
"learning_rate": 3.0520646319569125e-05, |
|
"loss": 0.3177, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06462615564132483, |
|
"grad_norm": 1.249617099761963, |
|
"learning_rate": 3.231597845601436e-05, |
|
"loss": 0.3081, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06821649762139843, |
|
"grad_norm": 1.1702481508255005, |
|
"learning_rate": 3.411131059245961e-05, |
|
"loss": 0.3122, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07180683960147204, |
|
"grad_norm": 1.0217711925506592, |
|
"learning_rate": 3.5906642728904846e-05, |
|
"loss": 0.3047, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07539718158154564, |
|
"grad_norm": 0.8885968923568726, |
|
"learning_rate": 3.770197486535009e-05, |
|
"loss": 0.3042, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07898752356161924, |
|
"grad_norm": 1.0739161968231201, |
|
"learning_rate": 3.9497307001795335e-05, |
|
"loss": 0.2957, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08257786554169284, |
|
"grad_norm": 1.963072419166565, |
|
"learning_rate": 4.129263913824057e-05, |
|
"loss": 0.2967, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08616820752176645, |
|
"grad_norm": 0.9546407461166382, |
|
"learning_rate": 4.308797127468582e-05, |
|
"loss": 0.2834, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08975854950184005, |
|
"grad_norm": 1.82941734790802, |
|
"learning_rate": 4.488330341113106e-05, |
|
"loss": 0.2864, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09334889148191365, |
|
"grad_norm": 1.4494279623031616, |
|
"learning_rate": 4.667863554757631e-05, |
|
"loss": 0.2891, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09693923346198725, |
|
"grad_norm": 1.195784330368042, |
|
"learning_rate": 4.847396768402155e-05, |
|
"loss": 0.2904, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10052957544206086, |
|
"grad_norm": 1.0053528547286987, |
|
"learning_rate": 5.026929982046679e-05, |
|
"loss": 0.2804, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.10411991742213446, |
|
"grad_norm": 4.148128986358643, |
|
"learning_rate": 5.2064631956912034e-05, |
|
"loss": 0.3165, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10771025940220806, |
|
"grad_norm": 1.4303346872329712, |
|
"learning_rate": 5.385996409335727e-05, |
|
"loss": 0.2747, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11130060138228166, |
|
"grad_norm": 1.128341794013977, |
|
"learning_rate": 5.565529622980251e-05, |
|
"loss": 0.297, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11489094336235527, |
|
"grad_norm": 1.2280890941619873, |
|
"learning_rate": 5.745062836624776e-05, |
|
"loss": 0.2821, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11848128534242887, |
|
"grad_norm": 1.4685401916503906, |
|
"learning_rate": 5.9245960502693e-05, |
|
"loss": 0.2815, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12207162732250247, |
|
"grad_norm": 2.4324777126312256, |
|
"learning_rate": 6.104129263913825e-05, |
|
"loss": 0.291, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12566196930257606, |
|
"grad_norm": 1.2875359058380127, |
|
"learning_rate": 6.283662477558349e-05, |
|
"loss": 0.2852, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12925231128264966, |
|
"grad_norm": 2.257322072982788, |
|
"learning_rate": 6.463195691202873e-05, |
|
"loss": 0.2804, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13284265326272326, |
|
"grad_norm": 1.3770567178726196, |
|
"learning_rate": 6.642728904847398e-05, |
|
"loss": 0.2873, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13643299524279687, |
|
"grad_norm": 1.6921864748001099, |
|
"learning_rate": 6.822262118491922e-05, |
|
"loss": 0.2974, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.14002333722287047, |
|
"grad_norm": 0.9520618915557861, |
|
"learning_rate": 7.001795332136445e-05, |
|
"loss": 0.2939, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14361367920294407, |
|
"grad_norm": 0.812728762626648, |
|
"learning_rate": 7.181328545780969e-05, |
|
"loss": 0.2702, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14720402118301767, |
|
"grad_norm": 1.7924541234970093, |
|
"learning_rate": 7.360861759425493e-05, |
|
"loss": 0.2969, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.15079436316309128, |
|
"grad_norm": 2.439558982849121, |
|
"learning_rate": 7.540394973070018e-05, |
|
"loss": 0.2893, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15438470514316488, |
|
"grad_norm": 0.8057828545570374, |
|
"learning_rate": 7.719928186714542e-05, |
|
"loss": 0.2808, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15797504712323848, |
|
"grad_norm": 1.2622177600860596, |
|
"learning_rate": 7.899461400359067e-05, |
|
"loss": 0.282, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16156538910331208, |
|
"grad_norm": 1.1095036268234253, |
|
"learning_rate": 8.078994614003591e-05, |
|
"loss": 0.2691, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1651557310833857, |
|
"grad_norm": 0.7493880987167358, |
|
"learning_rate": 8.258527827648115e-05, |
|
"loss": 0.2748, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1687460730634593, |
|
"grad_norm": 0.7199195623397827, |
|
"learning_rate": 8.43806104129264e-05, |
|
"loss": 0.2876, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1723364150435329, |
|
"grad_norm": 0.9257749915122986, |
|
"learning_rate": 8.617594254937164e-05, |
|
"loss": 0.2801, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1759267570236065, |
|
"grad_norm": 0.9219655394554138, |
|
"learning_rate": 8.797127468581689e-05, |
|
"loss": 0.2717, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1795170990036801, |
|
"grad_norm": 1.5916101932525635, |
|
"learning_rate": 8.976660682226212e-05, |
|
"loss": 0.275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1831074409837537, |
|
"grad_norm": 1.1832544803619385, |
|
"learning_rate": 9.156193895870736e-05, |
|
"loss": 0.2774, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1866977829638273, |
|
"grad_norm": 0.8959478735923767, |
|
"learning_rate": 9.335727109515261e-05, |
|
"loss": 0.3003, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1902881249439009, |
|
"grad_norm": 0.7720569968223572, |
|
"learning_rate": 9.515260323159785e-05, |
|
"loss": 0.2771, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1938784669239745, |
|
"grad_norm": 1.263458013534546, |
|
"learning_rate": 9.69479353680431e-05, |
|
"loss": 0.2737, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1974688089040481, |
|
"grad_norm": 1.6316909790039062, |
|
"learning_rate": 9.874326750448834e-05, |
|
"loss": 0.2899, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2010591508841217, |
|
"grad_norm": 0.8948745131492615, |
|
"learning_rate": 9.999991163368873e-05, |
|
"loss": 0.2703, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.20464949286419531, |
|
"grad_norm": 1.680094599723816, |
|
"learning_rate": 9.999834068573299e-05, |
|
"loss": 0.2828, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.20823983484426892, |
|
"grad_norm": 1.1262023448944092, |
|
"learning_rate": 9.999480611298721e-05, |
|
"loss": 0.2651, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.21183017682434252, |
|
"grad_norm": 1.2514327764511108, |
|
"learning_rate": 9.998930805426751e-05, |
|
"loss": 0.2828, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.21542051880441612, |
|
"grad_norm": 0.8650713562965393, |
|
"learning_rate": 9.998184672550354e-05, |
|
"loss": 0.2641, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21901086078448972, |
|
"grad_norm": 1.3188605308532715, |
|
"learning_rate": 9.997242241973004e-05, |
|
"loss": 0.2791, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.22260120276456333, |
|
"grad_norm": 1.677878737449646, |
|
"learning_rate": 9.996103550707527e-05, |
|
"loss": 0.2803, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.22619154474463693, |
|
"grad_norm": 1.9317690134048462, |
|
"learning_rate": 9.994768643474658e-05, |
|
"loss": 0.263, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.22978188672471053, |
|
"grad_norm": 0.8656140565872192, |
|
"learning_rate": 9.993237572701274e-05, |
|
"loss": 0.2723, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.23337222870478413, |
|
"grad_norm": 0.7631008625030518, |
|
"learning_rate": 9.991510398518341e-05, |
|
"loss": 0.2958, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.23696257068485774, |
|
"grad_norm": 0.6852580308914185, |
|
"learning_rate": 9.989587188758552e-05, |
|
"loss": 0.2612, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.24055291266493134, |
|
"grad_norm": 0.6097802519798279, |
|
"learning_rate": 9.987468018953661e-05, |
|
"loss": 0.2607, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.24414325464500494, |
|
"grad_norm": 1.254186987876892, |
|
"learning_rate": 9.985152972331516e-05, |
|
"loss": 0.2662, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.24773359662507854, |
|
"grad_norm": 0.8868479132652283, |
|
"learning_rate": 9.982642139812793e-05, |
|
"loss": 0.2705, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2513239386051521, |
|
"grad_norm": 1.5867512226104736, |
|
"learning_rate": 9.979935620007424e-05, |
|
"loss": 0.2735, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25491428058522575, |
|
"grad_norm": 0.7384280562400818, |
|
"learning_rate": 9.977033519210725e-05, |
|
"loss": 0.2676, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2585046225652993, |
|
"grad_norm": 0.7617084383964539, |
|
"learning_rate": 9.97393595139922e-05, |
|
"loss": 0.2655, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.26209496454537295, |
|
"grad_norm": 0.6475211381912231, |
|
"learning_rate": 9.970643038226166e-05, |
|
"loss": 0.2629, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.26568530652544653, |
|
"grad_norm": 1.3059916496276855, |
|
"learning_rate": 9.967154909016772e-05, |
|
"loss": 0.2548, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.26927564850552016, |
|
"grad_norm": 1.1138116121292114, |
|
"learning_rate": 9.963471700763123e-05, |
|
"loss": 0.2525, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.27286599048559373, |
|
"grad_norm": 1.0550082921981812, |
|
"learning_rate": 9.959593558118803e-05, |
|
"loss": 0.2622, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.27645633246566736, |
|
"grad_norm": 0.8017902374267578, |
|
"learning_rate": 9.955520633393205e-05, |
|
"loss": 0.2649, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.28004667444574094, |
|
"grad_norm": 1.235143780708313, |
|
"learning_rate": 9.951253086545558e-05, |
|
"loss": 0.2747, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.28363701642581457, |
|
"grad_norm": 0.7427018284797668, |
|
"learning_rate": 9.946791085178639e-05, |
|
"loss": 0.242, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.28722735840588814, |
|
"grad_norm": 0.6972371935844421, |
|
"learning_rate": 9.942134804532193e-05, |
|
"loss": 0.2423, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2908177003859618, |
|
"grad_norm": 0.9071277976036072, |
|
"learning_rate": 9.937284427476052e-05, |
|
"loss": 0.2425, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.29440804236603535, |
|
"grad_norm": 0.8345310688018799, |
|
"learning_rate": 9.932240144502952e-05, |
|
"loss": 0.2864, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.297998384346109, |
|
"grad_norm": 1.1392581462860107, |
|
"learning_rate": 9.927002153721044e-05, |
|
"loss": 0.2366, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.30158872632618255, |
|
"grad_norm": 0.9356684684753418, |
|
"learning_rate": 9.921570660846131e-05, |
|
"loss": 0.2464, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3051790683062562, |
|
"grad_norm": 1.5248229503631592, |
|
"learning_rate": 9.915945879193571e-05, |
|
"loss": 0.2809, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.30876941028632976, |
|
"grad_norm": 1.0663933753967285, |
|
"learning_rate": 9.91012802966991e-05, |
|
"loss": 0.2779, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3123597522664034, |
|
"grad_norm": 0.9292562007904053, |
|
"learning_rate": 9.904117340764201e-05, |
|
"loss": 0.2465, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.31595009424647696, |
|
"grad_norm": 0.7365911602973938, |
|
"learning_rate": 9.897914048539032e-05, |
|
"loss": 0.2688, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3195404362265506, |
|
"grad_norm": 1.0190156698226929, |
|
"learning_rate": 9.891518396621258e-05, |
|
"loss": 0.2471, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.32313077820662417, |
|
"grad_norm": 1.167611837387085, |
|
"learning_rate": 9.884930636192426e-05, |
|
"loss": 0.2468, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3267211201866978, |
|
"grad_norm": 1.1509454250335693, |
|
"learning_rate": 9.878151025978918e-05, |
|
"loss": 0.2528, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3303114621667714, |
|
"grad_norm": 1.0654162168502808, |
|
"learning_rate": 9.871179832241781e-05, |
|
"loss": 0.2669, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.333901804146845, |
|
"grad_norm": 0.9040902853012085, |
|
"learning_rate": 9.86401732876628e-05, |
|
"loss": 0.2513, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3374921461269186, |
|
"grad_norm": 2.8603482246398926, |
|
"learning_rate": 9.856663796851137e-05, |
|
"loss": 0.2526, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3410824881069922, |
|
"grad_norm": 0.7283102869987488, |
|
"learning_rate": 9.849119525297488e-05, |
|
"loss": 0.2565, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3446728300870658, |
|
"grad_norm": 1.1231544017791748, |
|
"learning_rate": 9.841384810397538e-05, |
|
"loss": 0.2591, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3482631720671394, |
|
"grad_norm": 1.3341351747512817, |
|
"learning_rate": 9.833459955922926e-05, |
|
"loss": 0.2426, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.351853514047213, |
|
"grad_norm": 0.7382979393005371, |
|
"learning_rate": 9.825345273112796e-05, |
|
"loss": 0.2404, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3554438560272866, |
|
"grad_norm": 0.9196600914001465, |
|
"learning_rate": 9.817041080661571e-05, |
|
"loss": 0.269, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3590341980073602, |
|
"grad_norm": 4.254228115081787, |
|
"learning_rate": 9.808547704706437e-05, |
|
"loss": 0.2498, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3626245399874338, |
|
"grad_norm": 0.6999326348304749, |
|
"learning_rate": 9.799865478814535e-05, |
|
"loss": 0.242, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3662148819675074, |
|
"grad_norm": 1.5552287101745605, |
|
"learning_rate": 9.790994743969864e-05, |
|
"loss": 0.2663, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.36980522394758103, |
|
"grad_norm": 0.6971444487571716, |
|
"learning_rate": 9.781935848559878e-05, |
|
"loss": 0.2549, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3733955659276546, |
|
"grad_norm": 1.180908441543579, |
|
"learning_rate": 9.772689148361817e-05, |
|
"loss": 0.2313, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.37698590790772823, |
|
"grad_norm": 0.633343517780304, |
|
"learning_rate": 9.763255006528731e-05, |
|
"loss": 0.2395, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3805762498878018, |
|
"grad_norm": 0.9181081056594849, |
|
"learning_rate": 9.753633793575206e-05, |
|
"loss": 0.2512, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.38416659186787544, |
|
"grad_norm": 1.1254559755325317, |
|
"learning_rate": 9.743825887362832e-05, |
|
"loss": 0.2467, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.387756933847949, |
|
"grad_norm": 0.8145197629928589, |
|
"learning_rate": 9.733831673085344e-05, |
|
"loss": 0.2421, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.39134727582802264, |
|
"grad_norm": 0.5483050346374512, |
|
"learning_rate": 9.723651543253509e-05, |
|
"loss": 0.2578, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3949376178080962, |
|
"grad_norm": 0.7891978621482849, |
|
"learning_rate": 9.713285897679699e-05, |
|
"loss": 0.2339, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.39852795978816985, |
|
"grad_norm": 0.6310613751411438, |
|
"learning_rate": 9.702735143462198e-05, |
|
"loss": 0.2379, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.4021183017682434, |
|
"grad_norm": 0.8631925582885742, |
|
"learning_rate": 9.691999694969208e-05, |
|
"loss": 0.2413, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.40570864374831705, |
|
"grad_norm": 0.7224175930023193, |
|
"learning_rate": 9.681079973822576e-05, |
|
"loss": 0.2343, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.40929898572839063, |
|
"grad_norm": 0.8189213871955872, |
|
"learning_rate": 9.669976408881238e-05, |
|
"loss": 0.2513, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.4128893277084642, |
|
"grad_norm": 0.8129417300224304, |
|
"learning_rate": 9.658689436224373e-05, |
|
"loss": 0.2547, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.41647966968853783, |
|
"grad_norm": 1.1440197229385376, |
|
"learning_rate": 9.647219499134277e-05, |
|
"loss": 0.2427, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.4200700116686114, |
|
"grad_norm": 0.9682267308235168, |
|
"learning_rate": 9.635567048078958e-05, |
|
"loss": 0.2411, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.42366035364868504, |
|
"grad_norm": 0.7513495683670044, |
|
"learning_rate": 9.623732540694437e-05, |
|
"loss": 0.252, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4272506956287586, |
|
"grad_norm": 3.1498029232025146, |
|
"learning_rate": 9.61171644176678e-05, |
|
"loss": 0.2486, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.43084103760883224, |
|
"grad_norm": 0.6250784397125244, |
|
"learning_rate": 9.599519223213842e-05, |
|
"loss": 0.2459, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4344313795889058, |
|
"grad_norm": 0.548052966594696, |
|
"learning_rate": 9.587141364066736e-05, |
|
"loss": 0.2334, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.43802172156897945, |
|
"grad_norm": 0.6549167037010193, |
|
"learning_rate": 9.574583350451016e-05, |
|
"loss": 0.2399, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.441612063549053, |
|
"grad_norm": 0.7177796363830566, |
|
"learning_rate": 9.561845675567586e-05, |
|
"loss": 0.2574, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.44520240552912665, |
|
"grad_norm": 1.0265281200408936, |
|
"learning_rate": 9.548928839673334e-05, |
|
"loss": 0.2285, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.44879274750920023, |
|
"grad_norm": 1.3221251964569092, |
|
"learning_rate": 9.535833350061473e-05, |
|
"loss": 0.2293, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.45238308948927386, |
|
"grad_norm": 0.9542430639266968, |
|
"learning_rate": 9.522559721041636e-05, |
|
"loss": 0.2367, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.45597343146934743, |
|
"grad_norm": 2.0089797973632812, |
|
"learning_rate": 9.509108473919662e-05, |
|
"loss": 0.2166, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.45956377344942106, |
|
"grad_norm": 1.2323672771453857, |
|
"learning_rate": 9.495480136977127e-05, |
|
"loss": 0.2253, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.46315411542949464, |
|
"grad_norm": 1.155745506286621, |
|
"learning_rate": 9.4816752454506e-05, |
|
"loss": 0.2236, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.46674445740956827, |
|
"grad_norm": 0.5866098403930664, |
|
"learning_rate": 9.46769434151062e-05, |
|
"loss": 0.2346, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.47033479938964184, |
|
"grad_norm": 0.8677975535392761, |
|
"learning_rate": 9.4535379742404e-05, |
|
"loss": 0.2229, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4739251413697155, |
|
"grad_norm": 0.8805405497550964, |
|
"learning_rate": 9.439206699614263e-05, |
|
"loss": 0.2279, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.47751548334978905, |
|
"grad_norm": 0.5903385877609253, |
|
"learning_rate": 9.424701080475811e-05, |
|
"loss": 0.2454, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4811058253298627, |
|
"grad_norm": 0.9364457726478577, |
|
"learning_rate": 9.410021686515815e-05, |
|
"loss": 0.2454, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.48469616730993625, |
|
"grad_norm": 1.4409586191177368, |
|
"learning_rate": 9.39516909424985e-05, |
|
"loss": 0.2417, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4882865092900099, |
|
"grad_norm": 0.705747663974762, |
|
"learning_rate": 9.380143886995636e-05, |
|
"loss": 0.2253, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.49187685127008346, |
|
"grad_norm": 1.2557168006896973, |
|
"learning_rate": 9.364946654850148e-05, |
|
"loss": 0.2332, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4954671932501571, |
|
"grad_norm": 1.4732472896575928, |
|
"learning_rate": 9.349577994666427e-05, |
|
"loss": 0.2202, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.49905753523023066, |
|
"grad_norm": 1.1212490797042847, |
|
"learning_rate": 9.33403851003015e-05, |
|
"loss": 0.2064, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.5026478772103042, |
|
"grad_norm": 0.825175404548645, |
|
"learning_rate": 9.31832881123591e-05, |
|
"loss": 0.2148, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5062382191903779, |
|
"grad_norm": 0.8229523301124573, |
|
"learning_rate": 9.302449515263268e-05, |
|
"loss": 0.2307, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5098285611704515, |
|
"grad_norm": 0.8145741820335388, |
|
"learning_rate": 9.286401245752501e-05, |
|
"loss": 0.2405, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5134189031505251, |
|
"grad_norm": 0.7511823177337646, |
|
"learning_rate": 9.270184632980121e-05, |
|
"loss": 0.2311, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.5170092451305986, |
|
"grad_norm": 0.7575204968452454, |
|
"learning_rate": 9.253800313834127e-05, |
|
"loss": 0.2068, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5205995871106723, |
|
"grad_norm": 0.6711773872375488, |
|
"learning_rate": 9.237248931788972e-05, |
|
"loss": 0.2336, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5241899290907459, |
|
"grad_norm": 0.7057952880859375, |
|
"learning_rate": 9.220531136880314e-05, |
|
"loss": 0.2332, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5277802710708195, |
|
"grad_norm": 0.7404478788375854, |
|
"learning_rate": 9.203647585679471e-05, |
|
"loss": 0.2204, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5313706130508931, |
|
"grad_norm": 0.6271808743476868, |
|
"learning_rate": 9.186598941267642e-05, |
|
"loss": 0.207, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5349609550309667, |
|
"grad_norm": 0.7089178562164307, |
|
"learning_rate": 9.169385873209863e-05, |
|
"loss": 0.2259, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5385512970110403, |
|
"grad_norm": 0.949642539024353, |
|
"learning_rate": 9.152009057528714e-05, |
|
"loss": 0.229, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5421416389911139, |
|
"grad_norm": 0.7554659247398376, |
|
"learning_rate": 9.134469176677762e-05, |
|
"loss": 0.2208, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5457319809711875, |
|
"grad_norm": 0.713874340057373, |
|
"learning_rate": 9.116766919514765e-05, |
|
"loss": 0.2177, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5493223229512612, |
|
"grad_norm": 0.6753556728363037, |
|
"learning_rate": 9.098902981274615e-05, |
|
"loss": 0.2202, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5529126649313347, |
|
"grad_norm": 1.2491189241409302, |
|
"learning_rate": 9.080878063542035e-05, |
|
"loss": 0.2118, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5565030069114083, |
|
"grad_norm": 0.6264563798904419, |
|
"learning_rate": 9.062692874224024e-05, |
|
"loss": 0.2211, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5600933488914819, |
|
"grad_norm": 0.4661034941673279, |
|
"learning_rate": 9.044348127522054e-05, |
|
"loss": 0.2168, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5636836908715556, |
|
"grad_norm": 0.6062325835227966, |
|
"learning_rate": 9.025844543904022e-05, |
|
"loss": 0.214, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5672740328516291, |
|
"grad_norm": 0.6374778747558594, |
|
"learning_rate": 9.007182850075956e-05, |
|
"loss": 0.2083, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5708643748317027, |
|
"grad_norm": 1.131443738937378, |
|
"learning_rate": 8.98836377895347e-05, |
|
"loss": 0.2005, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5744547168117763, |
|
"grad_norm": 0.6167281866073608, |
|
"learning_rate": 8.969388069632987e-05, |
|
"loss": 0.2122, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.57804505879185, |
|
"grad_norm": 0.9362030625343323, |
|
"learning_rate": 8.950256467362699e-05, |
|
"loss": 0.2275, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5816354007719235, |
|
"grad_norm": 0.9304684996604919, |
|
"learning_rate": 8.930969723513312e-05, |
|
"loss": 0.2027, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5852257427519971, |
|
"grad_norm": 0.62895268201828, |
|
"learning_rate": 8.911528595548533e-05, |
|
"loss": 0.2266, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5888160847320707, |
|
"grad_norm": 1.480999231338501, |
|
"learning_rate": 8.891933846995312e-05, |
|
"loss": 0.2052, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5924064267121444, |
|
"grad_norm": 1.3081512451171875, |
|
"learning_rate": 8.872186247413874e-05, |
|
"loss": 0.212, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.595996768692218, |
|
"grad_norm": 2.765312671661377, |
|
"learning_rate": 8.852286572367476e-05, |
|
"loss": 0.2233, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5995871106722915, |
|
"grad_norm": 1.2033319473266602, |
|
"learning_rate": 8.832235603391958e-05, |
|
"loss": 0.2199, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.6031774526523651, |
|
"grad_norm": 1.092360496520996, |
|
"learning_rate": 8.812034127965048e-05, |
|
"loss": 0.1994, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6067677946324388, |
|
"grad_norm": 1.0622711181640625, |
|
"learning_rate": 8.791682939475438e-05, |
|
"loss": 0.2117, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6103581366125124, |
|
"grad_norm": 0.722064733505249, |
|
"learning_rate": 8.771182837191613e-05, |
|
"loss": 0.2219, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6139484785925859, |
|
"grad_norm": 0.602187991142273, |
|
"learning_rate": 8.750534626230475e-05, |
|
"loss": 0.2159, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.6175388205726595, |
|
"grad_norm": 0.7628340721130371, |
|
"learning_rate": 8.729739117525715e-05, |
|
"loss": 0.2088, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.6211291625527331, |
|
"grad_norm": 0.5262313485145569, |
|
"learning_rate": 8.708797127795963e-05, |
|
"loss": 0.2285, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6247195045328068, |
|
"grad_norm": 0.6427643299102783, |
|
"learning_rate": 8.68770947951272e-05, |
|
"loss": 0.2094, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6283098465128804, |
|
"grad_norm": 0.5874310731887817, |
|
"learning_rate": 8.666477000868046e-05, |
|
"loss": 0.2263, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6319001884929539, |
|
"grad_norm": 0.561213493347168, |
|
"learning_rate": 8.645100525742042e-05, |
|
"loss": 0.2025, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6354905304730275, |
|
"grad_norm": 0.7805958390235901, |
|
"learning_rate": 8.623580893670105e-05, |
|
"loss": 0.2171, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6390808724531012, |
|
"grad_norm": 0.5806890726089478, |
|
"learning_rate": 8.601918949809937e-05, |
|
"loss": 0.2103, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6426712144331748, |
|
"grad_norm": 0.581363320350647, |
|
"learning_rate": 8.580115544908374e-05, |
|
"loss": 0.2129, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6462615564132483, |
|
"grad_norm": 0.4736599326133728, |
|
"learning_rate": 8.558171535267958e-05, |
|
"loss": 0.1993, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6498518983933219, |
|
"grad_norm": 0.6482508778572083, |
|
"learning_rate": 8.536087782713318e-05, |
|
"loss": 0.193, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6534422403733956, |
|
"grad_norm": 0.7920377850532532, |
|
"learning_rate": 8.513865154557315e-05, |
|
"loss": 0.1989, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6570325823534692, |
|
"grad_norm": 0.7527133226394653, |
|
"learning_rate": 8.491504523566985e-05, |
|
"loss": 0.215, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6606229243335427, |
|
"grad_norm": 0.8890761733055115, |
|
"learning_rate": 8.46900676792926e-05, |
|
"loss": 0.1972, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.6642132663136163, |
|
"grad_norm": 1.100785732269287, |
|
"learning_rate": 8.44637277121647e-05, |
|
"loss": 0.1958, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.66780360829369, |
|
"grad_norm": 0.6120195388793945, |
|
"learning_rate": 8.423603422351665e-05, |
|
"loss": 0.21, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6713939502737636, |
|
"grad_norm": 0.9138973951339722, |
|
"learning_rate": 8.400699615573671e-05, |
|
"loss": 0.2144, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6749842922538372, |
|
"grad_norm": 0.6855999827384949, |
|
"learning_rate": 8.377662250402e-05, |
|
"loss": 0.1949, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6785746342339107, |
|
"grad_norm": 0.8468754291534424, |
|
"learning_rate": 8.354492231601505e-05, |
|
"loss": 0.207, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6821649762139844, |
|
"grad_norm": 0.650043249130249, |
|
"learning_rate": 8.331190469146848e-05, |
|
"loss": 0.2029, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.685755318194058, |
|
"grad_norm": 0.7149790525436401, |
|
"learning_rate": 8.307757878186767e-05, |
|
"loss": 0.1891, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6893456601741316, |
|
"grad_norm": 0.5650553703308105, |
|
"learning_rate": 8.284195379008137e-05, |
|
"loss": 0.2034, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6929360021542051, |
|
"grad_norm": 0.8220282793045044, |
|
"learning_rate": 8.260503896999814e-05, |
|
"loss": 0.2004, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6965263441342788, |
|
"grad_norm": 0.9552260041236877, |
|
"learning_rate": 8.236684362616307e-05, |
|
"loss": 0.2052, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.7001166861143524, |
|
"grad_norm": 0.643084704875946, |
|
"learning_rate": 8.212737711341223e-05, |
|
"loss": 0.2072, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.703707028094426, |
|
"grad_norm": 0.6681669354438782, |
|
"learning_rate": 8.188664883650537e-05, |
|
"loss": 0.1969, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.7072973700744996, |
|
"grad_norm": 1.1286799907684326, |
|
"learning_rate": 8.164466824975647e-05, |
|
"loss": 0.1964, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.7108877120545732, |
|
"grad_norm": 0.7001319527626038, |
|
"learning_rate": 8.14014448566625e-05, |
|
"loss": 0.1728, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.7144780540346468, |
|
"grad_norm": 0.8087079524993896, |
|
"learning_rate": 8.115698820953012e-05, |
|
"loss": 0.1879, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.7180683960147204, |
|
"grad_norm": 0.5888068079948425, |
|
"learning_rate": 8.091130790910065e-05, |
|
"loss": 0.2017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.721658737994794, |
|
"grad_norm": 0.868241012096405, |
|
"learning_rate": 8.066441360417283e-05, |
|
"loss": 0.2002, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.7252490799748676, |
|
"grad_norm": 0.9173946976661682, |
|
"learning_rate": 8.041631499122399e-05, |
|
"loss": 0.1822, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7288394219549412, |
|
"grad_norm": 0.7348050475120544, |
|
"learning_rate": 8.016702181402925e-05, |
|
"loss": 0.1822, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7324297639350148, |
|
"grad_norm": 0.5974103808403015, |
|
"learning_rate": 7.991654386327877e-05, |
|
"loss": 0.1894, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7360201059150884, |
|
"grad_norm": 1.2631843090057373, |
|
"learning_rate": 7.966489097619327e-05, |
|
"loss": 0.2005, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7396104478951621, |
|
"grad_norm": 0.9306305050849915, |
|
"learning_rate": 7.941207303613773e-05, |
|
"loss": 0.2077, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7432007898752356, |
|
"grad_norm": 0.6469571590423584, |
|
"learning_rate": 7.915809997223312e-05, |
|
"loss": 0.1893, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7467911318553092, |
|
"grad_norm": 0.6804335713386536, |
|
"learning_rate": 7.89029817589665e-05, |
|
"loss": 0.1985, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7503814738353828, |
|
"grad_norm": 0.6059459447860718, |
|
"learning_rate": 7.864672841579944e-05, |
|
"loss": 0.1856, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7539718158154565, |
|
"grad_norm": 0.6755326390266418, |
|
"learning_rate": 7.838935000677419e-05, |
|
"loss": 0.1816, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.75756215779553, |
|
"grad_norm": 0.5813919901847839, |
|
"learning_rate": 7.813085664011873e-05, |
|
"loss": 0.1796, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7611524997756036, |
|
"grad_norm": 0.9791029691696167, |
|
"learning_rate": 7.78712584678496e-05, |
|
"loss": 0.204, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.7647428417556772, |
|
"grad_norm": 0.6557776927947998, |
|
"learning_rate": 7.76105656853733e-05, |
|
"loss": 0.1897, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7683331837357509, |
|
"grad_norm": 0.5696374177932739, |
|
"learning_rate": 7.73487885310858e-05, |
|
"loss": 0.1882, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.7719235257158245, |
|
"grad_norm": 0.704799473285675, |
|
"learning_rate": 7.708593728597046e-05, |
|
"loss": 0.186, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.775513867695898, |
|
"grad_norm": 0.9005138874053955, |
|
"learning_rate": 7.682202227319433e-05, |
|
"loss": 0.1938, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7791042096759716, |
|
"grad_norm": 0.7679111957550049, |
|
"learning_rate": 7.655705385770258e-05, |
|
"loss": 0.182, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7826945516560453, |
|
"grad_norm": 0.7027627229690552, |
|
"learning_rate": 7.629104244581156e-05, |
|
"loss": 0.1859, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7862848936361189, |
|
"grad_norm": 0.8638216853141785, |
|
"learning_rate": 7.602399848480002e-05, |
|
"loss": 0.1945, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7898752356161924, |
|
"grad_norm": 0.6846340894699097, |
|
"learning_rate": 7.575593246249885e-05, |
|
"loss": 0.1899, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.793465577596266, |
|
"grad_norm": 0.7671458721160889, |
|
"learning_rate": 7.548685490687919e-05, |
|
"loss": 0.1835, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7970559195763397, |
|
"grad_norm": 1.7174897193908691, |
|
"learning_rate": 7.521677638563889e-05, |
|
"loss": 0.1742, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.8006462615564133, |
|
"grad_norm": 1.024430751800537, |
|
"learning_rate": 7.494570750578757e-05, |
|
"loss": 0.1827, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.8042366035364868, |
|
"grad_norm": 0.8393763303756714, |
|
"learning_rate": 7.467365891322995e-05, |
|
"loss": 0.1726, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.8078269455165604, |
|
"grad_norm": 3.184171438217163, |
|
"learning_rate": 7.440064129234783e-05, |
|
"loss": 0.1855, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8114172874966341, |
|
"grad_norm": 0.7078256011009216, |
|
"learning_rate": 7.412666536558041e-05, |
|
"loss": 0.1783, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.8150076294767077, |
|
"grad_norm": 0.7265491485595703, |
|
"learning_rate": 7.385174189300323e-05, |
|
"loss": 0.19, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.8185979714567813, |
|
"grad_norm": 0.8136366605758667, |
|
"learning_rate": 7.35758816719055e-05, |
|
"loss": 0.1685, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.8221883134368548, |
|
"grad_norm": 1.0148855447769165, |
|
"learning_rate": 7.329909553636618e-05, |
|
"loss": 0.1781, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.8257786554169284, |
|
"grad_norm": 0.9568372964859009, |
|
"learning_rate": 7.302139435682831e-05, |
|
"loss": 0.1702, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8293689973970021, |
|
"grad_norm": 1.8222324848175049, |
|
"learning_rate": 7.274278903967229e-05, |
|
"loss": 0.1823, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8329593393770757, |
|
"grad_norm": 0.6024855375289917, |
|
"learning_rate": 7.246329052678736e-05, |
|
"loss": 0.1741, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8365496813571492, |
|
"grad_norm": 0.9722542762756348, |
|
"learning_rate": 7.218290979514202e-05, |
|
"loss": 0.1757, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8401400233372228, |
|
"grad_norm": 2.1216533184051514, |
|
"learning_rate": 7.190165785635273e-05, |
|
"loss": 0.1748, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8437303653172965, |
|
"grad_norm": 0.6482483148574829, |
|
"learning_rate": 7.161954575625172e-05, |
|
"loss": 0.1799, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8473207072973701, |
|
"grad_norm": 2.2838494777679443, |
|
"learning_rate": 7.133658457445291e-05, |
|
"loss": 0.1616, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8509110492774437, |
|
"grad_norm": 0.6801573634147644, |
|
"learning_rate": 7.105278542391695e-05, |
|
"loss": 0.1806, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.8545013912575172, |
|
"grad_norm": 0.8442283272743225, |
|
"learning_rate": 7.076815945051465e-05, |
|
"loss": 0.1821, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.8580917332375909, |
|
"grad_norm": 1.1653680801391602, |
|
"learning_rate": 7.048271783258936e-05, |
|
"loss": 0.1773, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8616820752176645, |
|
"grad_norm": 0.6987717151641846, |
|
"learning_rate": 7.019647178051779e-05, |
|
"loss": 0.1693, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8652724171977381, |
|
"grad_norm": 0.6374627351760864, |
|
"learning_rate": 6.990943253626994e-05, |
|
"loss": 0.194, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8688627591778116, |
|
"grad_norm": 0.6507960557937622, |
|
"learning_rate": 6.962161137296743e-05, |
|
"loss": 0.1568, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.8724531011578853, |
|
"grad_norm": 0.6699422597885132, |
|
"learning_rate": 6.933301959444082e-05, |
|
"loss": 0.1759, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8760434431379589, |
|
"grad_norm": 0.48265889286994934, |
|
"learning_rate": 6.904366853478567e-05, |
|
"loss": 0.1735, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8796337851180325, |
|
"grad_norm": 0.8710943460464478, |
|
"learning_rate": 6.875356955791735e-05, |
|
"loss": 0.1807, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.883224127098106, |
|
"grad_norm": 0.7356705069541931, |
|
"learning_rate": 6.846273405712483e-05, |
|
"loss": 0.1751, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.8868144690781797, |
|
"grad_norm": 0.6466989517211914, |
|
"learning_rate": 6.817117345462316e-05, |
|
"loss": 0.1599, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8904048110582533, |
|
"grad_norm": 0.5134007334709167, |
|
"learning_rate": 6.787889920110488e-05, |
|
"loss": 0.1666, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8939951530383269, |
|
"grad_norm": 0.471064954996109, |
|
"learning_rate": 6.75859227752903e-05, |
|
"loss": 0.1624, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8975854950184005, |
|
"grad_norm": 0.606399655342102, |
|
"learning_rate": 6.729225568347677e-05, |
|
"loss": 0.1696, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9011758369984741, |
|
"grad_norm": 0.6752104759216309, |
|
"learning_rate": 6.699790945908662e-05, |
|
"loss": 0.1607, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.9047661789785477, |
|
"grad_norm": 0.8237718939781189, |
|
"learning_rate": 6.670289566221437e-05, |
|
"loss": 0.1601, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.9083565209586213, |
|
"grad_norm": 0.7542670965194702, |
|
"learning_rate": 6.640722587917263e-05, |
|
"loss": 0.1608, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.9119468629386949, |
|
"grad_norm": 0.609646737575531, |
|
"learning_rate": 6.611091172203708e-05, |
|
"loss": 0.1586, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.9155372049187686, |
|
"grad_norm": 0.7793768644332886, |
|
"learning_rate": 6.581396482819038e-05, |
|
"loss": 0.1601, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9191275468988421, |
|
"grad_norm": 0.9071997404098511, |
|
"learning_rate": 6.551639685986524e-05, |
|
"loss": 0.166, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.9227178888789157, |
|
"grad_norm": 1.0000146627426147, |
|
"learning_rate": 6.521821950368625e-05, |
|
"loss": 0.1702, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.9263082308589893, |
|
"grad_norm": 0.8889328241348267, |
|
"learning_rate": 6.491944447021102e-05, |
|
"loss": 0.1669, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.929898572839063, |
|
"grad_norm": 0.6329061985015869, |
|
"learning_rate": 6.462008349347022e-05, |
|
"loss": 0.1641, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.9334889148191365, |
|
"grad_norm": 0.7821244597434998, |
|
"learning_rate": 6.43201483305067e-05, |
|
"loss": 0.1643, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9370792567992101, |
|
"grad_norm": 1.3463133573532104, |
|
"learning_rate": 6.401965076091382e-05, |
|
"loss": 0.1603, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.9406695987792837, |
|
"grad_norm": 2.534256935119629, |
|
"learning_rate": 6.371860258637278e-05, |
|
"loss": 0.1577, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.9442599407593574, |
|
"grad_norm": 0.9502484202384949, |
|
"learning_rate": 6.341701563018913e-05, |
|
"loss": 0.1529, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.947850282739431, |
|
"grad_norm": 0.5928242206573486, |
|
"learning_rate": 6.311490173682839e-05, |
|
"loss": 0.1633, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.9514406247195045, |
|
"grad_norm": 1.3390663862228394, |
|
"learning_rate": 6.281227277145093e-05, |
|
"loss": 0.1609, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9550309666995781, |
|
"grad_norm": 0.8307391405105591, |
|
"learning_rate": 6.250914061944597e-05, |
|
"loss": 0.1654, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.9586213086796518, |
|
"grad_norm": 0.6453768610954285, |
|
"learning_rate": 6.220551718596477e-05, |
|
"loss": 0.1504, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.9622116506597254, |
|
"grad_norm": 0.9472678899765015, |
|
"learning_rate": 6.190141439545304e-05, |
|
"loss": 0.1441, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.9658019926397989, |
|
"grad_norm": 1.077405571937561, |
|
"learning_rate": 6.159684419118274e-05, |
|
"loss": 0.1574, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.9693923346198725, |
|
"grad_norm": 1.373565673828125, |
|
"learning_rate": 6.129181853478285e-05, |
|
"loss": 0.1557, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9729826765999462, |
|
"grad_norm": 0.7159507274627686, |
|
"learning_rate": 6.0986349405769795e-05, |
|
"loss": 0.148, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.9765730185800198, |
|
"grad_norm": 0.7065421342849731, |
|
"learning_rate": 6.068044880107675e-05, |
|
"loss": 0.1481, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.9801633605600933, |
|
"grad_norm": 1.0575318336486816, |
|
"learning_rate": 6.0374128734582634e-05, |
|
"loss": 0.1546, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.9837537025401669, |
|
"grad_norm": 1.3331146240234375, |
|
"learning_rate": 6.006740123664022e-05, |
|
"loss": 0.1685, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.9873440445202406, |
|
"grad_norm": 0.712989091873169, |
|
"learning_rate": 5.976027835360366e-05, |
|
"loss": 0.1443, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9909343865003142, |
|
"grad_norm": 0.9985840320587158, |
|
"learning_rate": 5.945277214735537e-05, |
|
"loss": 0.1381, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.9945247284803878, |
|
"grad_norm": 0.6109340786933899, |
|
"learning_rate": 5.914489469483234e-05, |
|
"loss": 0.1506, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.9981150704604613, |
|
"grad_norm": 0.5232493281364441, |
|
"learning_rate": 5.883665808755179e-05, |
|
"loss": 0.1527, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.001705412440535, |
|
"grad_norm": 1.120089054107666, |
|
"learning_rate": 5.852807443113635e-05, |
|
"loss": 0.1397, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.0052957544206085, |
|
"grad_norm": 0.9276136755943298, |
|
"learning_rate": 5.821915584483853e-05, |
|
"loss": 0.1155, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.008886096400682, |
|
"grad_norm": 0.6816973686218262, |
|
"learning_rate": 5.790991446106487e-05, |
|
"loss": 0.1111, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.0124764383807558, |
|
"grad_norm": 0.8138614296913147, |
|
"learning_rate": 5.7600362424899354e-05, |
|
"loss": 0.1107, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.0160667803608294, |
|
"grad_norm": 0.5443429350852966, |
|
"learning_rate": 5.729051189362649e-05, |
|
"loss": 0.1122, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.019657122340903, |
|
"grad_norm": 0.6204805970191956, |
|
"learning_rate": 5.698037503625379e-05, |
|
"loss": 0.1147, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.0232474643209766, |
|
"grad_norm": 0.5502025485038757, |
|
"learning_rate": 5.6669964033033905e-05, |
|
"loss": 0.1135, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0268378063010501, |
|
"grad_norm": 0.6541283130645752, |
|
"learning_rate": 5.6359291074986244e-05, |
|
"loss": 0.1225, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.0304281482811237, |
|
"grad_norm": 0.6311090588569641, |
|
"learning_rate": 5.604836836341816e-05, |
|
"loss": 0.1063, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.0340184902611973, |
|
"grad_norm": 0.9657145738601685, |
|
"learning_rate": 5.573720810944575e-05, |
|
"loss": 0.1171, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.037608832241271, |
|
"grad_norm": 0.53743577003479, |
|
"learning_rate": 5.542582253351438e-05, |
|
"loss": 0.1128, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.0411991742213447, |
|
"grad_norm": 0.7501124739646912, |
|
"learning_rate": 5.511422386491858e-05, |
|
"loss": 0.1117, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0447895162014182, |
|
"grad_norm": 0.7120064496994019, |
|
"learning_rate": 5.480242434132191e-05, |
|
"loss": 0.1049, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.0483798581814918, |
|
"grad_norm": 0.5755088329315186, |
|
"learning_rate": 5.4490436208276194e-05, |
|
"loss": 0.1047, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.0519702001615654, |
|
"grad_norm": 0.8773960471153259, |
|
"learning_rate": 5.4178271718740744e-05, |
|
"loss": 0.1119, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.055560542141639, |
|
"grad_norm": 0.5922686457633972, |
|
"learning_rate": 5.3865943132601e-05, |
|
"loss": 0.1092, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.0591508841217125, |
|
"grad_norm": 0.7486307621002197, |
|
"learning_rate": 5.355346271618715e-05, |
|
"loss": 0.1068, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0627412261017861, |
|
"grad_norm": 0.8534032702445984, |
|
"learning_rate": 5.324084274179228e-05, |
|
"loss": 0.1072, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.0663315680818597, |
|
"grad_norm": 0.7270232439041138, |
|
"learning_rate": 5.292809548719049e-05, |
|
"loss": 0.1101, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.0699219100619335, |
|
"grad_norm": 0.5195777416229248, |
|
"learning_rate": 5.2615233235154616e-05, |
|
"loss": 0.1084, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.073512252042007, |
|
"grad_norm": 0.5684207081794739, |
|
"learning_rate": 5.230226827297395e-05, |
|
"loss": 0.1026, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0771025940220806, |
|
"grad_norm": 1.3543568849563599, |
|
"learning_rate": 5.198921289197153e-05, |
|
"loss": 0.1026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0806929360021542, |
|
"grad_norm": 0.7514908313751221, |
|
"learning_rate": 5.167607938702154e-05, |
|
"loss": 0.1085, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.0842832779822278, |
|
"grad_norm": 0.6683730483055115, |
|
"learning_rate": 5.136288005606631e-05, |
|
"loss": 0.1012, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.0878736199623014, |
|
"grad_norm": 0.5652278065681458, |
|
"learning_rate": 5.1049627199633496e-05, |
|
"loss": 0.119, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.091463961942375, |
|
"grad_norm": 0.7017742395401001, |
|
"learning_rate": 5.073633312035287e-05, |
|
"loss": 0.1057, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.0950543039224485, |
|
"grad_norm": 0.5066478848457336, |
|
"learning_rate": 5.042301012247317e-05, |
|
"loss": 0.1127, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0986446459025223, |
|
"grad_norm": 0.535321056842804, |
|
"learning_rate": 5.010967051137887e-05, |
|
"loss": 0.1102, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.1022349878825959, |
|
"grad_norm": 0.6270662546157837, |
|
"learning_rate": 4.979632659310695e-05, |
|
"loss": 0.1008, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.1058253298626695, |
|
"grad_norm": 0.748859703540802, |
|
"learning_rate": 4.9482990673863485e-05, |
|
"loss": 0.0995, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.109415671842743, |
|
"grad_norm": 0.500746488571167, |
|
"learning_rate": 4.916967505954046e-05, |
|
"loss": 0.1056, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.1130060138228166, |
|
"grad_norm": 0.5748748183250427, |
|
"learning_rate": 4.885639205523239e-05, |
|
"loss": 0.106, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1165963558028902, |
|
"grad_norm": 0.593147337436676, |
|
"learning_rate": 4.854315396475304e-05, |
|
"loss": 0.1086, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.1201866977829638, |
|
"grad_norm": 0.6119722127914429, |
|
"learning_rate": 4.822997309015226e-05, |
|
"loss": 0.1035, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.1237770397630373, |
|
"grad_norm": 0.5296047925949097, |
|
"learning_rate": 4.7916861731232846e-05, |
|
"loss": 0.1083, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.127367381743111, |
|
"grad_norm": 0.7060047388076782, |
|
"learning_rate": 4.7603832185067416e-05, |
|
"loss": 0.1, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.1309577237231847, |
|
"grad_norm": 0.4993881583213806, |
|
"learning_rate": 4.729089674551547e-05, |
|
"loss": 0.1057, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.1345480657032583, |
|
"grad_norm": 0.7866911888122559, |
|
"learning_rate": 4.697806770274062e-05, |
|
"loss": 0.0997, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.1381384076833319, |
|
"grad_norm": 0.642524242401123, |
|
"learning_rate": 4.6665357342727865e-05, |
|
"loss": 0.1051, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.1417287496634054, |
|
"grad_norm": 0.5228136777877808, |
|
"learning_rate": 4.6352777946801094e-05, |
|
"loss": 0.1002, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.145319091643479, |
|
"grad_norm": 0.9493293762207031, |
|
"learning_rate": 4.604034179114067e-05, |
|
"loss": 0.1019, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.1489094336235526, |
|
"grad_norm": 0.5647363662719727, |
|
"learning_rate": 4.5728061146301476e-05, |
|
"loss": 0.0915, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1524997756036262, |
|
"grad_norm": 0.6017284989356995, |
|
"learning_rate": 4.5415948276730805e-05, |
|
"loss": 0.1098, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.1560901175837, |
|
"grad_norm": 0.46670928597450256, |
|
"learning_rate": 4.5104015440286826e-05, |
|
"loss": 0.1056, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.1596804595637735, |
|
"grad_norm": 0.6661453247070312, |
|
"learning_rate": 4.479227488775707e-05, |
|
"loss": 0.0964, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.163270801543847, |
|
"grad_norm": 0.642352819442749, |
|
"learning_rate": 4.4480738862377444e-05, |
|
"loss": 0.0907, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.1668611435239207, |
|
"grad_norm": 2.4927215576171875, |
|
"learning_rate": 4.4169419599351186e-05, |
|
"loss": 0.0969, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1704514855039942, |
|
"grad_norm": 0.5965277552604675, |
|
"learning_rate": 4.3858329325368536e-05, |
|
"loss": 0.0921, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.1740418274840678, |
|
"grad_norm": 0.503105103969574, |
|
"learning_rate": 4.354748025812639e-05, |
|
"loss": 0.0918, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.1776321694641414, |
|
"grad_norm": 2.0070412158966064, |
|
"learning_rate": 4.323688460584864e-05, |
|
"loss": 0.1008, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.181222511444215, |
|
"grad_norm": 0.5921032428741455, |
|
"learning_rate": 4.292655456680651e-05, |
|
"loss": 0.0992, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.1848128534242885, |
|
"grad_norm": 0.7106916308403015, |
|
"learning_rate": 4.261650232883965e-05, |
|
"loss": 0.0998, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1884031954043623, |
|
"grad_norm": 0.7483718395233154, |
|
"learning_rate": 4.230674006887734e-05, |
|
"loss": 0.1007, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.191993537384436, |
|
"grad_norm": 0.5854814648628235, |
|
"learning_rate": 4.199727995246041e-05, |
|
"loss": 0.1001, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.1955838793645095, |
|
"grad_norm": 1.022163987159729, |
|
"learning_rate": 4.1688134133263285e-05, |
|
"loss": 0.0989, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.199174221344583, |
|
"grad_norm": 0.6698512434959412, |
|
"learning_rate": 4.1379314752616784e-05, |
|
"loss": 0.0929, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.2027645633246566, |
|
"grad_norm": 0.8445412516593933, |
|
"learning_rate": 4.107083393903126e-05, |
|
"loss": 0.0865, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.2063549053047302, |
|
"grad_norm": 0.9410879611968994, |
|
"learning_rate": 4.076270380772021e-05, |
|
"loss": 0.0942, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.2099452472848038, |
|
"grad_norm": 0.4104284346103668, |
|
"learning_rate": 4.04549364601245e-05, |
|
"loss": 0.0957, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.2135355892648776, |
|
"grad_norm": 0.8418083786964417, |
|
"learning_rate": 4.014754398343716e-05, |
|
"loss": 0.0925, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.2171259312449512, |
|
"grad_norm": 0.5773093700408936, |
|
"learning_rate": 3.984053845012858e-05, |
|
"loss": 0.0921, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.2207162732250247, |
|
"grad_norm": 1.2288339138031006, |
|
"learning_rate": 3.953393191747239e-05, |
|
"loss": 0.089, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.2243066152050983, |
|
"grad_norm": 0.5901492238044739, |
|
"learning_rate": 3.9227736427071995e-05, |
|
"loss": 0.0903, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.2278969571851719, |
|
"grad_norm": 0.6220996379852295, |
|
"learning_rate": 3.892196400438755e-05, |
|
"loss": 0.0958, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.2314872991652455, |
|
"grad_norm": 0.6737645864486694, |
|
"learning_rate": 3.8616626658263825e-05, |
|
"loss": 0.0892, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.235077641145319, |
|
"grad_norm": 0.5661391019821167, |
|
"learning_rate": 3.831173638045839e-05, |
|
"loss": 0.0888, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.2386679831253926, |
|
"grad_norm": 0.7712500095367432, |
|
"learning_rate": 3.800730514517077e-05, |
|
"loss": 0.0859, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2422583251054662, |
|
"grad_norm": 0.7590687274932861, |
|
"learning_rate": 3.770334490857217e-05, |
|
"loss": 0.0868, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.24584866708554, |
|
"grad_norm": 0.5650063753128052, |
|
"learning_rate": 3.7399867608335895e-05, |
|
"loss": 0.0974, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.2494390090656136, |
|
"grad_norm": 0.8975266218185425, |
|
"learning_rate": 3.709688516316844e-05, |
|
"loss": 0.095, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.2530293510456871, |
|
"grad_norm": 0.5311192274093628, |
|
"learning_rate": 3.679440947234152e-05, |
|
"loss": 0.0925, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.2566196930257607, |
|
"grad_norm": 1.0144147872924805, |
|
"learning_rate": 3.649245241522468e-05, |
|
"loss": 0.0903, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2602100350058343, |
|
"grad_norm": 0.6833083629608154, |
|
"learning_rate": 3.619102585081872e-05, |
|
"loss": 0.0929, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.2638003769859079, |
|
"grad_norm": 0.6380596160888672, |
|
"learning_rate": 3.589014161728999e-05, |
|
"loss": 0.0787, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.2673907189659814, |
|
"grad_norm": 0.7181170582771301, |
|
"learning_rate": 3.558981153150542e-05, |
|
"loss": 0.0859, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.2709810609460552, |
|
"grad_norm": 0.6842727661132812, |
|
"learning_rate": 3.529004738856853e-05, |
|
"loss": 0.0823, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.2745714029261288, |
|
"grad_norm": 1.5806798934936523, |
|
"learning_rate": 3.4990860961356044e-05, |
|
"loss": 0.085, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.2781617449062024, |
|
"grad_norm": 0.6149685978889465, |
|
"learning_rate": 3.4692264000055594e-05, |
|
"loss": 0.0818, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.281752086886276, |
|
"grad_norm": 0.797741174697876, |
|
"learning_rate": 3.4394268231704266e-05, |
|
"loss": 0.0787, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.2853424288663495, |
|
"grad_norm": 0.5583544373512268, |
|
"learning_rate": 3.4096885359728036e-05, |
|
"loss": 0.0879, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.288932770846423, |
|
"grad_norm": 1.2549068927764893, |
|
"learning_rate": 3.380012706348209e-05, |
|
"loss": 0.085, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.2925231128264967, |
|
"grad_norm": 0.56533282995224, |
|
"learning_rate": 3.350400499779214e-05, |
|
"loss": 0.0932, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2961134548065703, |
|
"grad_norm": 0.9718196392059326, |
|
"learning_rate": 3.32085307924967e-05, |
|
"loss": 0.0901, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.2997037967866438, |
|
"grad_norm": 0.6769024133682251, |
|
"learning_rate": 3.2913716051990394e-05, |
|
"loss": 0.0845, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.3032941387667174, |
|
"grad_norm": 1.1620076894760132, |
|
"learning_rate": 3.261957235476813e-05, |
|
"loss": 0.0831, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.3068844807467912, |
|
"grad_norm": 0.5092564225196838, |
|
"learning_rate": 3.232611125297035e-05, |
|
"loss": 0.0804, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.3104748227268648, |
|
"grad_norm": 0.42432501912117004, |
|
"learning_rate": 3.2033344271929476e-05, |
|
"loss": 0.0866, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.3140651647069383, |
|
"grad_norm": 0.5998629331588745, |
|
"learning_rate": 3.17412829097171e-05, |
|
"loss": 0.0865, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.317655506687012, |
|
"grad_norm": 0.5421279072761536, |
|
"learning_rate": 3.144993863669251e-05, |
|
"loss": 0.0849, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.3212458486670855, |
|
"grad_norm": 0.6406755447387695, |
|
"learning_rate": 3.115932289505213e-05, |
|
"loss": 0.0814, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.324836190647159, |
|
"grad_norm": 0.9076423048973083, |
|
"learning_rate": 3.086944709838028e-05, |
|
"loss": 0.0898, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.3284265326272329, |
|
"grad_norm": 0.7807140350341797, |
|
"learning_rate": 3.0580322631200756e-05, |
|
"loss": 0.0828, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.3320168746073064, |
|
"grad_norm": 0.6127801537513733, |
|
"learning_rate": 3.029196084852981e-05, |
|
"loss": 0.08, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.33560721658738, |
|
"grad_norm": 0.6226149797439575, |
|
"learning_rate": 3.000437307543017e-05, |
|
"loss": 0.0774, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.3391975585674536, |
|
"grad_norm": 0.4141993820667267, |
|
"learning_rate": 2.9717570606566287e-05, |
|
"loss": 0.0817, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.3427879005475272, |
|
"grad_norm": 0.6416285634040833, |
|
"learning_rate": 2.943156470576073e-05, |
|
"loss": 0.0792, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.3463782425276007, |
|
"grad_norm": 0.6912229657173157, |
|
"learning_rate": 2.914636660555178e-05, |
|
"loss": 0.0743, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3499685845076743, |
|
"grad_norm": 0.8113506436347961, |
|
"learning_rate": 2.886198750675233e-05, |
|
"loss": 0.0843, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.353558926487748, |
|
"grad_norm": 0.6693570613861084, |
|
"learning_rate": 2.8578438578010053e-05, |
|
"loss": 0.0718, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.3571492684678215, |
|
"grad_norm": 0.6286030411720276, |
|
"learning_rate": 2.8295730955368573e-05, |
|
"loss": 0.0821, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.360739610447895, |
|
"grad_norm": 0.5432600975036621, |
|
"learning_rate": 2.8013875741830264e-05, |
|
"loss": 0.0779, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.3643299524279688, |
|
"grad_norm": 0.5628815293312073, |
|
"learning_rate": 2.7732884006920225e-05, |
|
"loss": 0.076, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.3679202944080424, |
|
"grad_norm": 0.761500895023346, |
|
"learning_rate": 2.745276678625141e-05, |
|
"loss": 0.0869, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.371510636388116, |
|
"grad_norm": 0.5888515710830688, |
|
"learning_rate": 2.717353508109125e-05, |
|
"loss": 0.0812, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.3751009783681896, |
|
"grad_norm": 0.5477086305618286, |
|
"learning_rate": 2.6895199857929643e-05, |
|
"loss": 0.0772, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.3786913203482631, |
|
"grad_norm": 0.5078212022781372, |
|
"learning_rate": 2.6617772048048284e-05, |
|
"loss": 0.0707, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.3822816623283367, |
|
"grad_norm": 0.5893701910972595, |
|
"learning_rate": 2.634126254709125e-05, |
|
"loss": 0.081, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.3858720043084105, |
|
"grad_norm": 0.9726279973983765, |
|
"learning_rate": 2.6065682214637123e-05, |
|
"loss": 0.0868, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.389462346288484, |
|
"grad_norm": 0.5375906229019165, |
|
"learning_rate": 2.5791041873772513e-05, |
|
"loss": 0.0754, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.3930526882685577, |
|
"grad_norm": 0.5937024354934692, |
|
"learning_rate": 2.5517352310667053e-05, |
|
"loss": 0.07, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.3966430302486312, |
|
"grad_norm": 0.5695418119430542, |
|
"learning_rate": 2.524462427414967e-05, |
|
"loss": 0.0712, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.4002333722287048, |
|
"grad_norm": 0.6219804883003235, |
|
"learning_rate": 2.497286847528646e-05, |
|
"loss": 0.0771, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.4038237142087784, |
|
"grad_norm": 0.7533654570579529, |
|
"learning_rate": 2.4702095586960085e-05, |
|
"loss": 0.073, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.407414056188852, |
|
"grad_norm": 0.5750814080238342, |
|
"learning_rate": 2.443231624345061e-05, |
|
"loss": 0.0753, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.4110043981689255, |
|
"grad_norm": 0.5853593349456787, |
|
"learning_rate": 2.416354104001779e-05, |
|
"loss": 0.0754, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.414594740148999, |
|
"grad_norm": 0.4552966356277466, |
|
"learning_rate": 2.389578053248493e-05, |
|
"loss": 0.0753, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.4181850821290727, |
|
"grad_norm": 0.718437671661377, |
|
"learning_rate": 2.362904523682447e-05, |
|
"loss": 0.0758, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.4217754241091463, |
|
"grad_norm": 0.7326009273529053, |
|
"learning_rate": 2.3363345628744832e-05, |
|
"loss": 0.0756, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.42536576608922, |
|
"grad_norm": 0.9607858657836914, |
|
"learning_rate": 2.3098692143279066e-05, |
|
"loss": 0.0719, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.4289561080692936, |
|
"grad_norm": 0.7754957675933838, |
|
"learning_rate": 2.283509517437496e-05, |
|
"loss": 0.0717, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.4325464500493672, |
|
"grad_norm": 0.8900684714317322, |
|
"learning_rate": 2.2572565074486972e-05, |
|
"loss": 0.0757, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.4361367920294408, |
|
"grad_norm": 0.6538607478141785, |
|
"learning_rate": 2.2311112154169507e-05, |
|
"loss": 0.0709, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4397271340095144, |
|
"grad_norm": 0.6442373991012573, |
|
"learning_rate": 2.2050746681672056e-05, |
|
"loss": 0.0736, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.443317475989588, |
|
"grad_norm": 0.9824745655059814, |
|
"learning_rate": 2.179147888253584e-05, |
|
"loss": 0.0741, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.4469078179696617, |
|
"grad_norm": 0.6084447503089905, |
|
"learning_rate": 2.1533318939192394e-05, |
|
"loss": 0.0675, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.4504981599497353, |
|
"grad_norm": 0.6071482300758362, |
|
"learning_rate": 2.127627699056345e-05, |
|
"loss": 0.0721, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.4540885019298089, |
|
"grad_norm": 0.5101909637451172, |
|
"learning_rate": 2.102036313166289e-05, |
|
"loss": 0.0691, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4576788439098824, |
|
"grad_norm": 0.5907676815986633, |
|
"learning_rate": 2.076558741320016e-05, |
|
"loss": 0.0624, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.461269185889956, |
|
"grad_norm": 0.7201829552650452, |
|
"learning_rate": 2.0511959841185713e-05, |
|
"loss": 0.0749, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.4648595278700296, |
|
"grad_norm": 0.5254886150360107, |
|
"learning_rate": 2.0259490376537865e-05, |
|
"loss": 0.078, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.4684498698501032, |
|
"grad_norm": 0.4855566620826721, |
|
"learning_rate": 2.0008188934691614e-05, |
|
"loss": 0.0727, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.4720402118301767, |
|
"grad_norm": 0.68084716796875, |
|
"learning_rate": 1.975806538520937e-05, |
|
"loss": 0.0679, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.4756305538102503, |
|
"grad_norm": 0.5893229842185974, |
|
"learning_rate": 1.9509129551393145e-05, |
|
"loss": 0.0709, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.479220895790324, |
|
"grad_norm": 0.5513525605201721, |
|
"learning_rate": 1.9261391209898912e-05, |
|
"loss": 0.0664, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.4828112377703977, |
|
"grad_norm": 0.45056793093681335, |
|
"learning_rate": 1.9014860090352476e-05, |
|
"loss": 0.0635, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.4864015797504713, |
|
"grad_norm": 0.6190094947814941, |
|
"learning_rate": 1.8769545874967566e-05, |
|
"loss": 0.0693, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.4899919217305448, |
|
"grad_norm": 0.6586858034133911, |
|
"learning_rate": 1.852545819816539e-05, |
|
"loss": 0.0652, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4935822637106184, |
|
"grad_norm": 0.9752713441848755, |
|
"learning_rate": 1.8282606646196353e-05, |
|
"loss": 0.0744, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.497172605690692, |
|
"grad_norm": 0.6681696176528931, |
|
"learning_rate": 1.8041000756763493e-05, |
|
"loss": 0.0671, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.5007629476707658, |
|
"grad_norm": 0.5906854867935181, |
|
"learning_rate": 1.7800650018648024e-05, |
|
"loss": 0.0736, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.5043532896508394, |
|
"grad_norm": 0.6534956097602844, |
|
"learning_rate": 1.7561563871336545e-05, |
|
"loss": 0.0674, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.507943631630913, |
|
"grad_norm": 0.5932891964912415, |
|
"learning_rate": 1.732375170465041e-05, |
|
"loss": 0.0672, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.5115339736109865, |
|
"grad_norm": 0.504921019077301, |
|
"learning_rate": 1.7087222858376834e-05, |
|
"loss": 0.07, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.51512431559106, |
|
"grad_norm": 0.6252205967903137, |
|
"learning_rate": 1.6851986621902265e-05, |
|
"loss": 0.0637, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.5187146575711337, |
|
"grad_norm": 0.47223180532455444, |
|
"learning_rate": 1.6618052233847404e-05, |
|
"loss": 0.0697, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.5223049995512072, |
|
"grad_norm": 0.4429969787597656, |
|
"learning_rate": 1.6385428881704405e-05, |
|
"loss": 0.0664, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.5258953415312808, |
|
"grad_norm": 0.44724294543266296, |
|
"learning_rate": 1.6154125701476092e-05, |
|
"loss": 0.0642, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.5294856835113544, |
|
"grad_norm": 0.49648982286453247, |
|
"learning_rate": 1.59241517773171e-05, |
|
"loss": 0.0616, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.533076025491428, |
|
"grad_norm": 0.3683583736419678, |
|
"learning_rate": 1.5695516141177142e-05, |
|
"loss": 0.0631, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.5366663674715015, |
|
"grad_norm": 0.7180688977241516, |
|
"learning_rate": 1.546822777244627e-05, |
|
"loss": 0.0658, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.5402567094515751, |
|
"grad_norm": 0.6510112881660461, |
|
"learning_rate": 1.5242295597602225e-05, |
|
"loss": 0.0624, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.543847051431649, |
|
"grad_norm": 0.6626403331756592, |
|
"learning_rate": 1.5017728489859862e-05, |
|
"loss": 0.0596, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.5474373934117225, |
|
"grad_norm": 0.7510163187980652, |
|
"learning_rate": 1.4794535268822673e-05, |
|
"loss": 0.0666, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.551027735391796, |
|
"grad_norm": 0.48777294158935547, |
|
"learning_rate": 1.4572724700136386e-05, |
|
"loss": 0.0623, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.5546180773718696, |
|
"grad_norm": 0.6740663647651672, |
|
"learning_rate": 1.4352305495144736e-05, |
|
"loss": 0.0699, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.5582084193519434, |
|
"grad_norm": 0.513523519039154, |
|
"learning_rate": 1.4133286310547294e-05, |
|
"loss": 0.0686, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.561798761332017, |
|
"grad_norm": 0.689508318901062, |
|
"learning_rate": 1.3915675748059537e-05, |
|
"loss": 0.0643, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5653891033120906, |
|
"grad_norm": 0.7558987736701965, |
|
"learning_rate": 1.3699482354074989e-05, |
|
"loss": 0.0638, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.5689794452921642, |
|
"grad_norm": 1.4819414615631104, |
|
"learning_rate": 1.3484714619329574e-05, |
|
"loss": 0.0579, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.5725697872722377, |
|
"grad_norm": 0.45672255754470825, |
|
"learning_rate": 1.3271380978568187e-05, |
|
"loss": 0.0597, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.5761601292523113, |
|
"grad_norm": 0.7070518136024475, |
|
"learning_rate": 1.3059489810213371e-05, |
|
"loss": 0.0653, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.5797504712323849, |
|
"grad_norm": 0.4744075536727905, |
|
"learning_rate": 1.2849049436036326e-05, |
|
"loss": 0.0609, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5833408132124585, |
|
"grad_norm": 0.5028963088989258, |
|
"learning_rate": 1.2640068120830035e-05, |
|
"loss": 0.0614, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.586931155192532, |
|
"grad_norm": 1.222612977027893, |
|
"learning_rate": 1.24325540720847e-05, |
|
"loss": 0.058, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.5905214971726056, |
|
"grad_norm": 0.4024209976196289, |
|
"learning_rate": 1.2226515439665392e-05, |
|
"loss": 0.0599, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.5941118391526792, |
|
"grad_norm": 0.5114520788192749, |
|
"learning_rate": 1.2021960315491975e-05, |
|
"loss": 0.0525, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.5977021811327528, |
|
"grad_norm": 0.6782193779945374, |
|
"learning_rate": 1.1818896733221318e-05, |
|
"loss": 0.0605, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.6012925231128265, |
|
"grad_norm": 0.4370103180408478, |
|
"learning_rate": 1.1617332667931763e-05, |
|
"loss": 0.0569, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.6048828650929001, |
|
"grad_norm": 0.5159808993339539, |
|
"learning_rate": 1.1417276035809926e-05, |
|
"loss": 0.0583, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.6084732070729737, |
|
"grad_norm": 0.45791277289390564, |
|
"learning_rate": 1.1218734693839794e-05, |
|
"loss": 0.0639, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.6120635490530473, |
|
"grad_norm": 0.6834966540336609, |
|
"learning_rate": 1.1021716439494156e-05, |
|
"loss": 0.0626, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.615653891033121, |
|
"grad_norm": 0.4611278176307678, |
|
"learning_rate": 1.0826229010428369e-05, |
|
"loss": 0.056, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.6192442330131946, |
|
"grad_norm": 0.6188788414001465, |
|
"learning_rate": 1.0632280084176444e-05, |
|
"loss": 0.0578, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.6228345749932682, |
|
"grad_norm": 0.5647935271263123, |
|
"learning_rate": 1.0439877277849575e-05, |
|
"loss": 0.0586, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.6264249169733418, |
|
"grad_norm": 0.6752751469612122, |
|
"learning_rate": 1.024902814783692e-05, |
|
"loss": 0.0555, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.6300152589534154, |
|
"grad_norm": 0.49796855449676514, |
|
"learning_rate": 1.0059740189508881e-05, |
|
"loss": 0.0556, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.633605600933489, |
|
"grad_norm": 0.6069309115409851, |
|
"learning_rate": 9.872020836922724e-06, |
|
"loss": 0.0564, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.6371959429135625, |
|
"grad_norm": 0.6443465948104858, |
|
"learning_rate": 9.68587746253059e-06, |
|
"loss": 0.0559, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.640786284893636, |
|
"grad_norm": 0.48786768317222595, |
|
"learning_rate": 9.501317376889985e-06, |
|
"loss": 0.0551, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.6443766268737097, |
|
"grad_norm": 0.6036781072616577, |
|
"learning_rate": 9.318347828376639e-06, |
|
"loss": 0.06, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.6479669688537832, |
|
"grad_norm": 0.7226144075393677, |
|
"learning_rate": 9.136976002899855e-06, |
|
"loss": 0.0616, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.6515573108338568, |
|
"grad_norm": 0.4328902065753937, |
|
"learning_rate": 8.957209023620277e-06, |
|
"loss": 0.0504, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6551476528139304, |
|
"grad_norm": 0.506410539150238, |
|
"learning_rate": 8.779053950670146e-06, |
|
"loss": 0.059, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.658737994794004, |
|
"grad_norm": 0.6660659909248352, |
|
"learning_rate": 8.602517780876007e-06, |
|
"loss": 0.0528, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.6623283367740778, |
|
"grad_norm": 0.5838719606399536, |
|
"learning_rate": 8.427607447483943e-06, |
|
"loss": 0.0561, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.6659186787541513, |
|
"grad_norm": 0.7501543760299683, |
|
"learning_rate": 8.254329819887252e-06, |
|
"loss": 0.0527, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.669509020734225, |
|
"grad_norm": 0.4832637906074524, |
|
"learning_rate": 8.082691703356688e-06, |
|
"loss": 0.0512, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.6730993627142985, |
|
"grad_norm": 0.5931252241134644, |
|
"learning_rate": 7.912699838773151e-06, |
|
"loss": 0.0513, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.6766897046943723, |
|
"grad_norm": 0.5244051218032837, |
|
"learning_rate": 7.744360902363002e-06, |
|
"loss": 0.0544, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.6802800466744459, |
|
"grad_norm": 0.6513102054595947, |
|
"learning_rate": 7.577681505435813e-06, |
|
"loss": 0.054, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.6838703886545194, |
|
"grad_norm": 0.8317810297012329, |
|
"learning_rate": 7.412668194124728e-06, |
|
"loss": 0.0507, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.687460730634593, |
|
"grad_norm": 0.4875124394893646, |
|
"learning_rate": 7.2493274491294285e-06, |
|
"loss": 0.0488, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.6910510726146666, |
|
"grad_norm": 0.4913179576396942, |
|
"learning_rate": 7.087665685461497e-06, |
|
"loss": 0.0551, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.6946414145947402, |
|
"grad_norm": 0.47164708375930786, |
|
"learning_rate": 6.9276892521925816e-06, |
|
"loss": 0.0548, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.6982317565748137, |
|
"grad_norm": 0.39257460832595825, |
|
"learning_rate": 6.769404432204973e-06, |
|
"loss": 0.0532, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.7018220985548873, |
|
"grad_norm": 0.548692524433136, |
|
"learning_rate": 6.61281744194494e-06, |
|
"loss": 0.0503, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.7054124405349609, |
|
"grad_norm": 0.476531445980072, |
|
"learning_rate": 6.4579344311784475e-06, |
|
"loss": 0.0514, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.7090027825150345, |
|
"grad_norm": 0.47037366032600403, |
|
"learning_rate": 6.304761482749777e-06, |
|
"loss": 0.0497, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.712593124495108, |
|
"grad_norm": 0.7144917845726013, |
|
"learning_rate": 6.153304612342514e-06, |
|
"loss": 0.0529, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.7161834664751816, |
|
"grad_norm": 0.7041458487510681, |
|
"learning_rate": 6.003569768243411e-06, |
|
"loss": 0.0493, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.7197738084552554, |
|
"grad_norm": 0.5702252984046936, |
|
"learning_rate": 5.855562831108624e-06, |
|
"loss": 0.0491, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.723364150435329, |
|
"grad_norm": 0.697307288646698, |
|
"learning_rate": 5.709289613732888e-06, |
|
"loss": 0.0533, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.7269544924154026, |
|
"grad_norm": 0.6015498638153076, |
|
"learning_rate": 5.564755860821147e-06, |
|
"loss": 0.0521, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.7305448343954761, |
|
"grad_norm": 0.6062167882919312, |
|
"learning_rate": 5.421967248763021e-06, |
|
"loss": 0.0547, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.73413517637555, |
|
"grad_norm": 0.45276394486427307, |
|
"learning_rate": 5.2809293854097495e-06, |
|
"loss": 0.0553, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.7377255183556235, |
|
"grad_norm": 0.4024350047111511, |
|
"learning_rate": 5.14164780985405e-06, |
|
"loss": 0.0512, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.741315860335697, |
|
"grad_norm": 0.6370827555656433, |
|
"learning_rate": 5.0041279922125705e-06, |
|
"loss": 0.0562, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.7449062023157706, |
|
"grad_norm": 0.5606709122657776, |
|
"learning_rate": 4.868375333411002e-06, |
|
"loss": 0.0556, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.7484965442958442, |
|
"grad_norm": 0.8585699796676636, |
|
"learning_rate": 4.734395164971978e-06, |
|
"loss": 0.0459, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.7520868862759178, |
|
"grad_norm": 0.4308234453201294, |
|
"learning_rate": 4.6021927488057334e-06, |
|
"loss": 0.0471, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.7556772282559914, |
|
"grad_norm": 0.4660848081111908, |
|
"learning_rate": 4.471773277003427e-06, |
|
"loss": 0.0524, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.759267570236065, |
|
"grad_norm": 0.6825345158576965, |
|
"learning_rate": 4.343141871633188e-06, |
|
"loss": 0.0521, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.7628579122161385, |
|
"grad_norm": 0.6137758493423462, |
|
"learning_rate": 4.216303584538988e-06, |
|
"loss": 0.0539, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.766448254196212, |
|
"grad_norm": 0.7231915593147278, |
|
"learning_rate": 4.0912633971422425e-06, |
|
"loss": 0.0466, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.7700385961762857, |
|
"grad_norm": 0.6705979108810425, |
|
"learning_rate": 3.968026220246174e-06, |
|
"loss": 0.047, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.7736289381563592, |
|
"grad_norm": 0.5974612832069397, |
|
"learning_rate": 3.846596893842891e-06, |
|
"loss": 0.0499, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.777219280136433, |
|
"grad_norm": 0.6848942637443542, |
|
"learning_rate": 3.7269801869233845e-06, |
|
"loss": 0.0545, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.7808096221165066, |
|
"grad_norm": 0.6268109083175659, |
|
"learning_rate": 3.6091807972901624e-06, |
|
"loss": 0.0519, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.7843999640965802, |
|
"grad_norm": 0.8246615529060364, |
|
"learning_rate": 3.49320335137282e-06, |
|
"loss": 0.0495, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.7879903060766538, |
|
"grad_norm": 0.7163103222846985, |
|
"learning_rate": 3.3790524040462566e-06, |
|
"loss": 0.0465, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.7915806480567276, |
|
"grad_norm": 0.5779036283493042, |
|
"learning_rate": 3.266732438451842e-06, |
|
"loss": 0.0493, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.7951709900368011, |
|
"grad_norm": 0.5178433060646057, |
|
"learning_rate": 3.1562478658213656e-06, |
|
"loss": 0.0499, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7987613320168747, |
|
"grad_norm": 0.7967355847358704, |
|
"learning_rate": 3.0476030253037415e-06, |
|
"loss": 0.0502, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.8023516739969483, |
|
"grad_norm": 0.8158264756202698, |
|
"learning_rate": 2.9408021837945942e-06, |
|
"loss": 0.0481, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.8059420159770219, |
|
"grad_norm": 0.43987634778022766, |
|
"learning_rate": 2.8358495357687364e-06, |
|
"loss": 0.0456, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.8095323579570954, |
|
"grad_norm": 0.45231232047080994, |
|
"learning_rate": 2.7327492031153866e-06, |
|
"loss": 0.0474, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.813122699937169, |
|
"grad_norm": 0.799350917339325, |
|
"learning_rate": 2.631505234976311e-06, |
|
"loss": 0.0489, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.8167130419172426, |
|
"grad_norm": 0.5466026663780212, |
|
"learning_rate": 2.5321216075867626e-06, |
|
"loss": 0.0474, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.8203033838973162, |
|
"grad_norm": 0.7424982190132141, |
|
"learning_rate": 2.4346022241193643e-06, |
|
"loss": 0.0452, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.8238937258773897, |
|
"grad_norm": 0.7979154586791992, |
|
"learning_rate": 2.3389509145308076e-06, |
|
"loss": 0.05, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.8274840678574633, |
|
"grad_norm": 0.6414862275123596, |
|
"learning_rate": 2.245171435411414e-06, |
|
"loss": 0.0487, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.8310744098375369, |
|
"grad_norm": 0.5069670081138611, |
|
"learning_rate": 2.1532674698376e-06, |
|
"loss": 0.0464, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.8346647518176105, |
|
"grad_norm": 0.4745350480079651, |
|
"learning_rate": 2.0632426272272464e-06, |
|
"loss": 0.0467, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.8382550937976843, |
|
"grad_norm": 0.5952518582344055, |
|
"learning_rate": 1.975100443197958e-06, |
|
"loss": 0.0508, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.8418454357777578, |
|
"grad_norm": 0.5413398146629333, |
|
"learning_rate": 1.8888443794281618e-06, |
|
"loss": 0.0426, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.8454357777578314, |
|
"grad_norm": 0.6297146677970886, |
|
"learning_rate": 1.8044778235211723e-06, |
|
"loss": 0.0523, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.849026119737905, |
|
"grad_norm": 0.458870530128479, |
|
"learning_rate": 1.72200408887217e-06, |
|
"loss": 0.0462, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.8526164617179788, |
|
"grad_norm": 0.6490904688835144, |
|
"learning_rate": 1.6414264145380442e-06, |
|
"loss": 0.0484, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.8562068036980524, |
|
"grad_norm": 0.7383233904838562, |
|
"learning_rate": 1.562747965110195e-06, |
|
"loss": 0.0484, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.859797145678126, |
|
"grad_norm": 2.4921016693115234, |
|
"learning_rate": 1.4859718305902326e-06, |
|
"loss": 0.046, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.8633874876581995, |
|
"grad_norm": 1.6146339178085327, |
|
"learning_rate": 1.411101026268652e-06, |
|
"loss": 0.043, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.866977829638273, |
|
"grad_norm": 0.47561097145080566, |
|
"learning_rate": 1.3381384926063833e-06, |
|
"loss": 0.0467, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.8705681716183467, |
|
"grad_norm": 0.5113374590873718, |
|
"learning_rate": 1.2670870951193292e-06, |
|
"loss": 0.0475, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.8741585135984202, |
|
"grad_norm": 0.5401134490966797, |
|
"learning_rate": 1.197949624265776e-06, |
|
"loss": 0.0482, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.8777488555784938, |
|
"grad_norm": 0.4193181097507477, |
|
"learning_rate": 1.1307287953368995e-06, |
|
"loss": 0.0472, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.8813391975585674, |
|
"grad_norm": 0.45812806487083435, |
|
"learning_rate": 1.065427248350015e-06, |
|
"loss": 0.0477, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.884929539538641, |
|
"grad_norm": 0.8749078512191772, |
|
"learning_rate": 1.0020475479449731e-06, |
|
"loss": 0.0507, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.8885198815187145, |
|
"grad_norm": 0.48960697650909424, |
|
"learning_rate": 9.405921832833841e-07, |
|
"loss": 0.046, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.892110223498788, |
|
"grad_norm": 0.7578288316726685, |
|
"learning_rate": 8.810635679509071e-07, |
|
"loss": 0.0471, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.895700565478862, |
|
"grad_norm": 0.6842608451843262, |
|
"learning_rate": 8.23464039862426e-07, |
|
"loss": 0.0445, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.8992909074589355, |
|
"grad_norm": 0.5089036226272583, |
|
"learning_rate": 7.67795861170234e-07, |
|
"loss": 0.0457, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.902881249439009, |
|
"grad_norm": 0.5393949151039124, |
|
"learning_rate": 7.140612181752048e-07, |
|
"loss": 0.0456, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.9064715914190826, |
|
"grad_norm": 0.9976809024810791, |
|
"learning_rate": 6.622622212409058e-07, |
|
"loss": 0.047, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.9100619333991564, |
|
"grad_norm": 0.5556519031524658, |
|
"learning_rate": 6.124009047107471e-07, |
|
"loss": 0.0517, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.91365227537923, |
|
"grad_norm": 0.534712553024292, |
|
"learning_rate": 5.644792268280574e-07, |
|
"loss": 0.0427, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.9172426173593036, |
|
"grad_norm": 0.7053726315498352, |
|
"learning_rate": 5.18499069659184e-07, |
|
"loss": 0.0455, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.9208329593393771, |
|
"grad_norm": 0.5793641209602356, |
|
"learning_rate": 4.744622390195963e-07, |
|
"loss": 0.0513, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.9244233013194507, |
|
"grad_norm": 0.4043155908584595, |
|
"learning_rate": 4.323704644029203e-07, |
|
"loss": 0.0501, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.9280136432995243, |
|
"grad_norm": 0.4776788353919983, |
|
"learning_rate": 3.9222539891307086e-07, |
|
"loss": 0.0415, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.9316039852795979, |
|
"grad_norm": 0.6649408340454102, |
|
"learning_rate": 3.5402861919928697e-07, |
|
"loss": 0.0451, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.9351943272596714, |
|
"grad_norm": 3.3624627590179443, |
|
"learning_rate": 3.1778162539421453e-07, |
|
"loss": 0.0472, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.938784669239745, |
|
"grad_norm": 0.5529268980026245, |
|
"learning_rate": 2.8348584105501453e-07, |
|
"loss": 0.045, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.9423750112198186, |
|
"grad_norm": 0.6905925273895264, |
|
"learning_rate": 2.511426131074246e-07, |
|
"loss": 0.0452, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.9459653531998922, |
|
"grad_norm": 0.6144551038742065, |
|
"learning_rate": 2.2075321179289565e-07, |
|
"loss": 0.0422, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.9495556951799657, |
|
"grad_norm": 1.2887723445892334, |
|
"learning_rate": 1.9231883061866517e-07, |
|
"loss": 0.0441, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.9531460371600395, |
|
"grad_norm": 0.7968602776527405, |
|
"learning_rate": 1.6584058631090582e-07, |
|
"loss": 0.0455, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.9567363791401131, |
|
"grad_norm": 0.7239225506782532, |
|
"learning_rate": 1.4131951877087158e-07, |
|
"loss": 0.0461, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.9603267211201867, |
|
"grad_norm": 0.6258605718612671, |
|
"learning_rate": 1.1875659103404157e-07, |
|
"loss": 0.0449, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.9639170631002603, |
|
"grad_norm": 0.7048450708389282, |
|
"learning_rate": 9.815268923230592e-08, |
|
"loss": 0.0469, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.967507405080334, |
|
"grad_norm": 0.6698242425918579, |
|
"learning_rate": 7.95086225591657e-08, |
|
"loss": 0.0469, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.9710977470604076, |
|
"grad_norm": 0.612483561038971, |
|
"learning_rate": 6.282512323795287e-08, |
|
"loss": 0.0432, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.9746880890404812, |
|
"grad_norm": 1.0906122922897339, |
|
"learning_rate": 4.81028464930755e-08, |
|
"loss": 0.0439, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9782784310205548, |
|
"grad_norm": 0.5854030847549438, |
|
"learning_rate": 3.534237052426059e-08, |
|
"loss": 0.0461, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.9818687730006284, |
|
"grad_norm": 0.5965482592582703, |
|
"learning_rate": 2.4544196483888837e-08, |
|
"loss": 0.0449, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.985459114980702, |
|
"grad_norm": 1.0227429866790771, |
|
"learning_rate": 1.5708748457271548e-08, |
|
"loss": 0.0476, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.9890494569607755, |
|
"grad_norm": 0.506277859210968, |
|
"learning_rate": 8.836373446019507e-09, |
|
"loss": 0.0477, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.992639798940849, |
|
"grad_norm": 0.4811525344848633, |
|
"learning_rate": 3.927341354420522e-09, |
|
"loss": 0.0468, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.9962301409209227, |
|
"grad_norm": 0.4584663212299347, |
|
"learning_rate": 9.818449787979412e-10, |
|
"loss": 0.0424, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.9998204829009962, |
|
"grad_norm": 0.6924448609352112, |
|
"learning_rate": 0.0, |
|
"loss": 0.0443, |
|
"step": 5570 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.008182835124896e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|