|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.97088108209656, |
|
"eval_steps": 500, |
|
"global_step": 13300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015029118917903438, |
|
"grad_norm": 0.2293534129858017, |
|
"learning_rate": 6.766917293233083e-07, |
|
"loss": 1.5634, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030058237835806877, |
|
"grad_norm": 0.2535412907600403, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 1.5043, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04508735675371031, |
|
"grad_norm": 0.3165118992328644, |
|
"learning_rate": 2.1804511278195492e-06, |
|
"loss": 1.5571, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06011647567161375, |
|
"grad_norm": 0.27761849761009216, |
|
"learning_rate": 2.9323308270676694e-06, |
|
"loss": 1.5064, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0751455945895172, |
|
"grad_norm": 0.34336039423942566, |
|
"learning_rate": 3.6842105263157892e-06, |
|
"loss": 1.5299, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09017471350742062, |
|
"grad_norm": 0.4327663481235504, |
|
"learning_rate": 4.436090225563911e-06, |
|
"loss": 1.5351, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10520383242532406, |
|
"grad_norm": 0.4244738221168518, |
|
"learning_rate": 5.187969924812031e-06, |
|
"loss": 1.4876, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1202329513432275, |
|
"grad_norm": 0.39235126972198486, |
|
"learning_rate": 5.939849624060151e-06, |
|
"loss": 1.4138, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13526207026113093, |
|
"grad_norm": 0.36149346828460693, |
|
"learning_rate": 6.691729323308271e-06, |
|
"loss": 1.3901, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1502911891790344, |
|
"grad_norm": 0.2174796313047409, |
|
"learning_rate": 7.4436090225563915e-06, |
|
"loss": 1.284, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16532030809693782, |
|
"grad_norm": 0.19376038014888763, |
|
"learning_rate": 8.195488721804512e-06, |
|
"loss": 1.2713, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18034942701484125, |
|
"grad_norm": 0.18585975468158722, |
|
"learning_rate": 8.947368421052632e-06, |
|
"loss": 1.2301, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1953785459327447, |
|
"grad_norm": 0.18462727963924408, |
|
"learning_rate": 9.699248120300752e-06, |
|
"loss": 1.231, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.21040766485064813, |
|
"grad_norm": 0.16348238289356232, |
|
"learning_rate": 1.0451127819548872e-05, |
|
"loss": 1.2167, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22543678376855156, |
|
"grad_norm": 0.17574988305568695, |
|
"learning_rate": 1.1203007518796992e-05, |
|
"loss": 1.1999, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.240465902686455, |
|
"grad_norm": 0.14682741463184357, |
|
"learning_rate": 1.1954887218045113e-05, |
|
"loss": 1.2491, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25549502160435844, |
|
"grad_norm": 0.1753804236650467, |
|
"learning_rate": 1.2706766917293233e-05, |
|
"loss": 1.2036, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.27052414052226187, |
|
"grad_norm": 0.17857442796230316, |
|
"learning_rate": 1.3458646616541353e-05, |
|
"loss": 1.1822, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2855532594401653, |
|
"grad_norm": 0.18367990851402283, |
|
"learning_rate": 1.4210526315789475e-05, |
|
"loss": 1.1679, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3005823783580688, |
|
"grad_norm": 0.20284640789031982, |
|
"learning_rate": 1.4962406015037595e-05, |
|
"loss": 1.1337, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3156114972759722, |
|
"grad_norm": 0.16659210622310638, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 1.181, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.33064061619387564, |
|
"grad_norm": 0.1798979490995407, |
|
"learning_rate": 1.6466165413533834e-05, |
|
"loss": 1.1785, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.34566973511177906, |
|
"grad_norm": 0.1689957082271576, |
|
"learning_rate": 1.7218045112781956e-05, |
|
"loss": 1.1489, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3606988540296825, |
|
"grad_norm": 0.199369415640831, |
|
"learning_rate": 1.7969924812030074e-05, |
|
"loss": 1.1677, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.375727972947586, |
|
"grad_norm": 0.23965977132320404, |
|
"learning_rate": 1.8721804511278196e-05, |
|
"loss": 1.1516, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3907570918654894, |
|
"grad_norm": 0.18958410620689392, |
|
"learning_rate": 1.9473684210526315e-05, |
|
"loss": 1.1278, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.40578621078339283, |
|
"grad_norm": 0.20659048855304718, |
|
"learning_rate": 2.0225563909774437e-05, |
|
"loss": 1.1613, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.42081532970129626, |
|
"grad_norm": 0.22374583780765533, |
|
"learning_rate": 2.097744360902256e-05, |
|
"loss": 1.1348, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4358444486191997, |
|
"grad_norm": 0.22938427329063416, |
|
"learning_rate": 2.1729323308270677e-05, |
|
"loss": 1.157, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4508735675371031, |
|
"grad_norm": 0.2688145935535431, |
|
"learning_rate": 2.24812030075188e-05, |
|
"loss": 1.1329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4659026864550066, |
|
"grad_norm": 0.21894283592700958, |
|
"learning_rate": 2.3233082706766917e-05, |
|
"loss": 1.1197, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.48093180537291, |
|
"grad_norm": 0.2249055653810501, |
|
"learning_rate": 2.398496240601504e-05, |
|
"loss": 1.127, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.49596092429081345, |
|
"grad_norm": 0.2487722635269165, |
|
"learning_rate": 2.4736842105263158e-05, |
|
"loss": 1.1331, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5109900432087169, |
|
"grad_norm": 0.2143404483795166, |
|
"learning_rate": 2.548872180451128e-05, |
|
"loss": 1.1342, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5260191621266204, |
|
"grad_norm": 0.27003639936447144, |
|
"learning_rate": 2.6240601503759398e-05, |
|
"loss": 1.133, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5410482810445237, |
|
"grad_norm": 0.25785332918167114, |
|
"learning_rate": 2.699248120300752e-05, |
|
"loss": 1.1284, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5560773999624272, |
|
"grad_norm": 0.24334581196308136, |
|
"learning_rate": 2.774436090225564e-05, |
|
"loss": 1.1149, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5711065188803306, |
|
"grad_norm": 0.23162595927715302, |
|
"learning_rate": 2.849624060150376e-05, |
|
"loss": 1.144, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5861356377982341, |
|
"grad_norm": 0.2650283873081207, |
|
"learning_rate": 2.924812030075188e-05, |
|
"loss": 1.1431, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6011647567161376, |
|
"grad_norm": 0.2596570551395416, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1211, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6161938756340409, |
|
"grad_norm": 0.25908759236335754, |
|
"learning_rate": 3.075187969924812e-05, |
|
"loss": 1.1287, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6312229945519444, |
|
"grad_norm": 0.24592378735542297, |
|
"learning_rate": 3.150375939849624e-05, |
|
"loss": 1.0913, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6462521134698478, |
|
"grad_norm": 0.2371867448091507, |
|
"learning_rate": 3.225563909774436e-05, |
|
"loss": 1.1435, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6612812323877513, |
|
"grad_norm": 0.25050684809684753, |
|
"learning_rate": 3.300751879699248e-05, |
|
"loss": 1.1009, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6763103513056548, |
|
"grad_norm": 0.26998868584632874, |
|
"learning_rate": 3.3759398496240603e-05, |
|
"loss": 1.1018, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6913394702235581, |
|
"grad_norm": 0.255825012922287, |
|
"learning_rate": 3.451127819548872e-05, |
|
"loss": 1.096, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7063685891414616, |
|
"grad_norm": 0.2328484058380127, |
|
"learning_rate": 3.526315789473684e-05, |
|
"loss": 1.1083, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.721397708059365, |
|
"grad_norm": 0.2772115170955658, |
|
"learning_rate": 3.6015037593984966e-05, |
|
"loss": 1.1243, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7364268269772685, |
|
"grad_norm": 0.2620559334754944, |
|
"learning_rate": 3.6766917293233084e-05, |
|
"loss": 1.1357, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.751455945895172, |
|
"grad_norm": 0.2600899934768677, |
|
"learning_rate": 3.75187969924812e-05, |
|
"loss": 1.1044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7664850648130753, |
|
"grad_norm": 0.23489312827587128, |
|
"learning_rate": 3.827067669172932e-05, |
|
"loss": 1.1028, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7815141837309788, |
|
"grad_norm": 0.2843015491962433, |
|
"learning_rate": 3.9022556390977447e-05, |
|
"loss": 1.1234, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7965433026488822, |
|
"grad_norm": 0.27744609117507935, |
|
"learning_rate": 3.9774436090225565e-05, |
|
"loss": 1.0939, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8115724215667857, |
|
"grad_norm": 0.2344987690448761, |
|
"learning_rate": 4.0526315789473684e-05, |
|
"loss": 1.094, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.826601540484689, |
|
"grad_norm": 0.2847677171230316, |
|
"learning_rate": 4.12781954887218e-05, |
|
"loss": 1.1, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8416306594025925, |
|
"grad_norm": 0.3026759922504425, |
|
"learning_rate": 4.203007518796993e-05, |
|
"loss": 1.1191, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.856659778320496, |
|
"grad_norm": 0.38774576783180237, |
|
"learning_rate": 4.2781954887218046e-05, |
|
"loss": 1.1273, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8716888972383994, |
|
"grad_norm": 0.28009462356567383, |
|
"learning_rate": 4.3533834586466164e-05, |
|
"loss": 1.081, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8867180161563029, |
|
"grad_norm": 0.2575189471244812, |
|
"learning_rate": 4.428571428571428e-05, |
|
"loss": 1.1077, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9017471350742062, |
|
"grad_norm": 0.2847520112991333, |
|
"learning_rate": 4.503759398496241e-05, |
|
"loss": 1.1041, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9167762539921097, |
|
"grad_norm": 0.31493791937828064, |
|
"learning_rate": 4.5789473684210527e-05, |
|
"loss": 1.1406, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9318053729100132, |
|
"grad_norm": 0.2649036645889282, |
|
"learning_rate": 4.6541353383458645e-05, |
|
"loss": 1.0949, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9468344918279166, |
|
"grad_norm": 0.29710251092910767, |
|
"learning_rate": 4.729323308270677e-05, |
|
"loss": 1.0853, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.96186361074582, |
|
"grad_norm": 0.26907584071159363, |
|
"learning_rate": 4.804511278195489e-05, |
|
"loss": 1.1092, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9768927296637234, |
|
"grad_norm": 0.3357178568840027, |
|
"learning_rate": 4.879699248120301e-05, |
|
"loss": 1.1179, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9919218485816269, |
|
"grad_norm": 0.2772427201271057, |
|
"learning_rate": 4.9548872180451126e-05, |
|
"loss": 1.0903, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0060116475671614, |
|
"grad_norm": 0.26991239190101624, |
|
"learning_rate": 5.030075187969925e-05, |
|
"loss": 1.1113, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.021040766485065, |
|
"grad_norm": 0.24760043621063232, |
|
"learning_rate": 5.1052631578947376e-05, |
|
"loss": 1.068, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0360698854029682, |
|
"grad_norm": 0.28557974100112915, |
|
"learning_rate": 5.180451127819549e-05, |
|
"loss": 1.0954, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0510990043208717, |
|
"grad_norm": 0.3007003962993622, |
|
"learning_rate": 5.2556390977443613e-05, |
|
"loss": 1.0944, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0661281232387751, |
|
"grad_norm": 0.30276528000831604, |
|
"learning_rate": 5.330827067669173e-05, |
|
"loss": 1.0945, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.0811572421566786, |
|
"grad_norm": 0.26913130283355713, |
|
"learning_rate": 5.406015037593986e-05, |
|
"loss": 1.112, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0961863610745821, |
|
"grad_norm": 0.289982408285141, |
|
"learning_rate": 5.481203007518797e-05, |
|
"loss": 1.0891, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1112154799924854, |
|
"grad_norm": 0.28320783376693726, |
|
"learning_rate": 5.5563909774436094e-05, |
|
"loss": 1.094, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.1262445989103889, |
|
"grad_norm": 0.31406116485595703, |
|
"learning_rate": 5.631578947368421e-05, |
|
"loss": 1.0853, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1412737178282923, |
|
"grad_norm": 0.299730122089386, |
|
"learning_rate": 5.706766917293234e-05, |
|
"loss": 1.1048, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1563028367461958, |
|
"grad_norm": 0.30774202942848206, |
|
"learning_rate": 5.781954887218045e-05, |
|
"loss": 1.0549, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.1713319556640993, |
|
"grad_norm": 0.325926810503006, |
|
"learning_rate": 5.8571428571428575e-05, |
|
"loss": 1.0823, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1863610745820026, |
|
"grad_norm": 0.31851741671562195, |
|
"learning_rate": 5.9323308270676694e-05, |
|
"loss": 1.0989, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.201390193499906, |
|
"grad_norm": 0.3333583474159241, |
|
"learning_rate": 6.007518796992482e-05, |
|
"loss": 1.0625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2164193124178095, |
|
"grad_norm": 0.3349563479423523, |
|
"learning_rate": 6.082706766917293e-05, |
|
"loss": 1.1002, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.231448431335713, |
|
"grad_norm": 0.3039754629135132, |
|
"learning_rate": 6.157894736842106e-05, |
|
"loss": 1.0927, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2464775502536165, |
|
"grad_norm": 0.3020300269126892, |
|
"learning_rate": 6.233082706766917e-05, |
|
"loss": 1.0983, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2615066691715198, |
|
"grad_norm": 0.31834477186203003, |
|
"learning_rate": 6.308270676691729e-05, |
|
"loss": 1.0628, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.2765357880894233, |
|
"grad_norm": 0.3013087809085846, |
|
"learning_rate": 6.383458646616541e-05, |
|
"loss": 1.0683, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2915649070073267, |
|
"grad_norm": 0.3001497983932495, |
|
"learning_rate": 6.458646616541354e-05, |
|
"loss": 1.0858, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.30659402592523, |
|
"grad_norm": 0.32003313302993774, |
|
"learning_rate": 6.533834586466165e-05, |
|
"loss": 1.0747, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.3216231448431337, |
|
"grad_norm": 0.3063625693321228, |
|
"learning_rate": 6.609022556390978e-05, |
|
"loss": 1.1008, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.336652263761037, |
|
"grad_norm": 0.27760475873947144, |
|
"learning_rate": 6.68421052631579e-05, |
|
"loss": 1.0903, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.3516813826789404, |
|
"grad_norm": 0.25132644176483154, |
|
"learning_rate": 6.759398496240602e-05, |
|
"loss": 1.0808, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.366710501596844, |
|
"grad_norm": 0.2900444567203522, |
|
"learning_rate": 6.834586466165414e-05, |
|
"loss": 1.0755, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.3817396205147472, |
|
"grad_norm": 0.2900155484676361, |
|
"learning_rate": 6.909774436090227e-05, |
|
"loss": 1.0797, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.396768739432651, |
|
"grad_norm": 0.31477174162864685, |
|
"learning_rate": 6.984962406015037e-05, |
|
"loss": 1.076, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.4117978583505542, |
|
"grad_norm": 0.3233202397823334, |
|
"learning_rate": 7.06015037593985e-05, |
|
"loss": 1.0968, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.4268269772684576, |
|
"grad_norm": 0.30731186270713806, |
|
"learning_rate": 7.135338345864661e-05, |
|
"loss": 1.0976, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.4418560961863611, |
|
"grad_norm": 0.24933114647865295, |
|
"learning_rate": 7.210526315789474e-05, |
|
"loss": 1.0713, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4568852151042644, |
|
"grad_norm": 0.2990662753582001, |
|
"learning_rate": 7.285714285714286e-05, |
|
"loss": 1.0988, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.4719143340221679, |
|
"grad_norm": 0.25678712129592896, |
|
"learning_rate": 7.360902255639098e-05, |
|
"loss": 1.0874, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4869434529400714, |
|
"grad_norm": 0.3273868262767792, |
|
"learning_rate": 7.43609022556391e-05, |
|
"loss": 1.1036, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.5019725718579748, |
|
"grad_norm": 0.26454275846481323, |
|
"learning_rate": 7.511278195488723e-05, |
|
"loss": 1.0713, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5170016907758783, |
|
"grad_norm": 0.2492770105600357, |
|
"learning_rate": 7.586466165413533e-05, |
|
"loss": 1.063, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.5320308096937816, |
|
"grad_norm": 0.28998205065727234, |
|
"learning_rate": 7.661654135338347e-05, |
|
"loss": 1.0866, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.5470599286116853, |
|
"grad_norm": 0.26011377573013306, |
|
"learning_rate": 7.736842105263159e-05, |
|
"loss": 1.0615, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5620890475295885, |
|
"grad_norm": 0.25039157271385193, |
|
"learning_rate": 7.81203007518797e-05, |
|
"loss": 1.0613, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.577118166447492, |
|
"grad_norm": 0.26238375902175903, |
|
"learning_rate": 7.887218045112782e-05, |
|
"loss": 1.0927, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5921472853653955, |
|
"grad_norm": 0.23926205933094025, |
|
"learning_rate": 7.962406015037594e-05, |
|
"loss": 1.0568, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.6071764042832988, |
|
"grad_norm": 0.24725791811943054, |
|
"learning_rate": 8.037593984962406e-05, |
|
"loss": 1.0772, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.6222055232012025, |
|
"grad_norm": 0.25732311606407166, |
|
"learning_rate": 8.112781954887219e-05, |
|
"loss": 1.1058, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.6372346421191057, |
|
"grad_norm": 0.2595824897289276, |
|
"learning_rate": 8.18796992481203e-05, |
|
"loss": 1.1056, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.6522637610370092, |
|
"grad_norm": 0.25049930810928345, |
|
"learning_rate": 8.263157894736843e-05, |
|
"loss": 1.0818, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6672928799549127, |
|
"grad_norm": 0.2525707185268402, |
|
"learning_rate": 8.338345864661655e-05, |
|
"loss": 1.1147, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.682321998872816, |
|
"grad_norm": 0.25421109795570374, |
|
"learning_rate": 8.413533834586467e-05, |
|
"loss": 1.0959, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.6973511177907197, |
|
"grad_norm": 0.2396637499332428, |
|
"learning_rate": 8.488721804511278e-05, |
|
"loss": 1.1012, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.712380236708623, |
|
"grad_norm": 0.24933594465255737, |
|
"learning_rate": 8.56390977443609e-05, |
|
"loss": 1.0931, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.7274093556265264, |
|
"grad_norm": 0.2631904184818268, |
|
"learning_rate": 8.639097744360902e-05, |
|
"loss": 1.1116, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.74243847454443, |
|
"grad_norm": 0.25884145498275757, |
|
"learning_rate": 8.714285714285715e-05, |
|
"loss": 1.0957, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.7574675934623332, |
|
"grad_norm": 0.23709504306316376, |
|
"learning_rate": 8.789473684210526e-05, |
|
"loss": 1.0804, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7724967123802369, |
|
"grad_norm": 0.25201550126075745, |
|
"learning_rate": 8.864661654135339e-05, |
|
"loss": 1.0887, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7875258312981401, |
|
"grad_norm": 0.2535940110683441, |
|
"learning_rate": 8.939849624060151e-05, |
|
"loss": 1.0748, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.8025549502160436, |
|
"grad_norm": 0.2509770691394806, |
|
"learning_rate": 9.015037593984963e-05, |
|
"loss": 1.1021, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.817584069133947, |
|
"grad_norm": 0.23271974921226501, |
|
"learning_rate": 9.090225563909775e-05, |
|
"loss": 1.0516, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.8326131880518504, |
|
"grad_norm": 0.249566912651062, |
|
"learning_rate": 9.165413533834586e-05, |
|
"loss": 1.0766, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.8476423069697538, |
|
"grad_norm": 0.22922058403491974, |
|
"learning_rate": 9.240601503759398e-05, |
|
"loss": 1.1056, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.8626714258876573, |
|
"grad_norm": 0.24767987430095673, |
|
"learning_rate": 9.315789473684211e-05, |
|
"loss": 1.0934, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8777005448055608, |
|
"grad_norm": 0.23084762692451477, |
|
"learning_rate": 9.390977443609022e-05, |
|
"loss": 1.0894, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8927296637234643, |
|
"grad_norm": 0.24973560869693756, |
|
"learning_rate": 9.466165413533835e-05, |
|
"loss": 1.0788, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.9077587826413676, |
|
"grad_norm": 0.248574361205101, |
|
"learning_rate": 9.541353383458647e-05, |
|
"loss": 1.0829, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.922787901559271, |
|
"grad_norm": 0.24072329699993134, |
|
"learning_rate": 9.616541353383459e-05, |
|
"loss": 1.1161, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.9378170204771745, |
|
"grad_norm": 0.2310166209936142, |
|
"learning_rate": 9.69172932330827e-05, |
|
"loss": 1.0682, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.952846139395078, |
|
"grad_norm": 0.23928825557231903, |
|
"learning_rate": 9.766917293233084e-05, |
|
"loss": 1.1194, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9678752583129815, |
|
"grad_norm": 0.2643069624900818, |
|
"learning_rate": 9.842105263157894e-05, |
|
"loss": 1.0712, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9829043772308848, |
|
"grad_norm": 0.2541036307811737, |
|
"learning_rate": 9.917293233082708e-05, |
|
"loss": 1.0847, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9979334961487882, |
|
"grad_norm": 0.2341761291027069, |
|
"learning_rate": 9.99248120300752e-05, |
|
"loss": 1.0847, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.012023295134323, |
|
"grad_norm": 0.2271430492401123, |
|
"learning_rate": 9.999986051218537e-05, |
|
"loss": 1.0459, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.027052414052226, |
|
"grad_norm": 0.2847868800163269, |
|
"learning_rate": 9.999937833308459e-05, |
|
"loss": 1.0499, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.04208153297013, |
|
"grad_norm": 0.283787339925766, |
|
"learning_rate": 9.999855174394648e-05, |
|
"loss": 1.0434, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.057110651888033, |
|
"grad_norm": 0.3147590756416321, |
|
"learning_rate": 9.999738075046483e-05, |
|
"loss": 1.053, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.0721397708059364, |
|
"grad_norm": 0.26797565817832947, |
|
"learning_rate": 9.999586536070575e-05, |
|
"loss": 1.0599, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.08716888972384, |
|
"grad_norm": 0.3145821988582611, |
|
"learning_rate": 9.99940055851077e-05, |
|
"loss": 1.053, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.1021980086417433, |
|
"grad_norm": 0.2934500277042389, |
|
"learning_rate": 9.999180143648135e-05, |
|
"loss": 1.0613, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.117227127559647, |
|
"grad_norm": 0.26865336298942566, |
|
"learning_rate": 9.998925293000949e-05, |
|
"loss": 1.0548, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.1322562464775503, |
|
"grad_norm": 0.3006330132484436, |
|
"learning_rate": 9.998636008324698e-05, |
|
"loss": 1.0362, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.1472853653954536, |
|
"grad_norm": 0.3416139483451843, |
|
"learning_rate": 9.998312291612057e-05, |
|
"loss": 1.0588, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.1623144843133573, |
|
"grad_norm": 0.3035484552383423, |
|
"learning_rate": 9.997954145092878e-05, |
|
"loss": 1.0675, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.1773436032312605, |
|
"grad_norm": 0.2740626335144043, |
|
"learning_rate": 9.997561571234179e-05, |
|
"loss": 1.0435, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.1923727221491642, |
|
"grad_norm": 0.2556332051753998, |
|
"learning_rate": 9.997134572740121e-05, |
|
"loss": 1.0803, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.2074018410670675, |
|
"grad_norm": 0.30163928866386414, |
|
"learning_rate": 9.996673152551991e-05, |
|
"loss": 1.0734, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.2224309599849708, |
|
"grad_norm": 0.3375592529773712, |
|
"learning_rate": 9.996177313848184e-05, |
|
"loss": 1.0906, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.2374600789028745, |
|
"grad_norm": 0.2721370756626129, |
|
"learning_rate": 9.995647060044177e-05, |
|
"loss": 1.0335, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.2524891978207777, |
|
"grad_norm": 0.26590871810913086, |
|
"learning_rate": 9.995082394792514e-05, |
|
"loss": 1.0448, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2675183167386814, |
|
"grad_norm": 0.31041955947875977, |
|
"learning_rate": 9.994483321982768e-05, |
|
"loss": 1.0715, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.2825474356565847, |
|
"grad_norm": 0.2897711396217346, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 1.0564, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.297576554574488, |
|
"grad_norm": 0.3064815402030945, |
|
"learning_rate": 9.993181970432349e-05, |
|
"loss": 1.0634, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.3126056734923917, |
|
"grad_norm": 0.28484266996383667, |
|
"learning_rate": 9.99247970065576e-05, |
|
"loss": 1.0742, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.327634792410295, |
|
"grad_norm": 0.2922673523426056, |
|
"learning_rate": 9.99174304124919e-05, |
|
"loss": 1.0851, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.3426639113281986, |
|
"grad_norm": 0.3106658160686493, |
|
"learning_rate": 9.990971997286961e-05, |
|
"loss": 1.1097, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.357693030246102, |
|
"grad_norm": 0.30149292945861816, |
|
"learning_rate": 9.990166574080246e-05, |
|
"loss": 1.048, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.372722149164005, |
|
"grad_norm": 0.2597978115081787, |
|
"learning_rate": 9.989326777177028e-05, |
|
"loss": 1.029, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.387751268081909, |
|
"grad_norm": 0.24886192381381989, |
|
"learning_rate": 9.988452612362071e-05, |
|
"loss": 1.054, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.402780386999812, |
|
"grad_norm": 0.3196369707584381, |
|
"learning_rate": 9.987544085656873e-05, |
|
"loss": 1.0715, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.417809505917716, |
|
"grad_norm": 0.28219732642173767, |
|
"learning_rate": 9.986601203319623e-05, |
|
"loss": 1.0631, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.432838624835619, |
|
"grad_norm": 0.2625892162322998, |
|
"learning_rate": 9.985623971845169e-05, |
|
"loss": 1.0699, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.4478677437535223, |
|
"grad_norm": 0.26191845536231995, |
|
"learning_rate": 9.984612397964956e-05, |
|
"loss": 1.0536, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.462896862671426, |
|
"grad_norm": 0.27230942249298096, |
|
"learning_rate": 9.983566488646999e-05, |
|
"loss": 1.0924, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.4779259815893293, |
|
"grad_norm": 0.2692161500453949, |
|
"learning_rate": 9.982486251095817e-05, |
|
"loss": 1.0414, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.492955100507233, |
|
"grad_norm": 0.2909376323223114, |
|
"learning_rate": 9.981371692752401e-05, |
|
"loss": 1.0797, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.5079842194251363, |
|
"grad_norm": 0.3020433783531189, |
|
"learning_rate": 9.980222821294143e-05, |
|
"loss": 1.0637, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.5230133383430395, |
|
"grad_norm": 0.2783840596675873, |
|
"learning_rate": 9.979039644634802e-05, |
|
"loss": 1.0617, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.5380424572609432, |
|
"grad_norm": 0.27026644349098206, |
|
"learning_rate": 9.977822170924434e-05, |
|
"loss": 1.0515, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.5530715761788465, |
|
"grad_norm": 0.2597585618495941, |
|
"learning_rate": 9.97657040854935e-05, |
|
"loss": 1.0541, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.56810069509675, |
|
"grad_norm": 0.2972753345966339, |
|
"learning_rate": 9.975284366132047e-05, |
|
"loss": 1.0541, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.5831298140146535, |
|
"grad_norm": 0.25682052969932556, |
|
"learning_rate": 9.973964052531154e-05, |
|
"loss": 1.0533, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.5981589329325567, |
|
"grad_norm": 0.2819693684577942, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 1.0458, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.61318805185046, |
|
"grad_norm": 0.28979477286338806, |
|
"learning_rate": 9.971220648393394e-05, |
|
"loss": 1.0747, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.6282171707683637, |
|
"grad_norm": 0.2849046289920807, |
|
"learning_rate": 9.96979757675388e-05, |
|
"loss": 1.05, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.6432462896862674, |
|
"grad_norm": 0.28079524636268616, |
|
"learning_rate": 9.968340271725352e-05, |
|
"loss": 1.0755, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.6582754086041707, |
|
"grad_norm": 0.27980852127075195, |
|
"learning_rate": 9.966848743346144e-05, |
|
"loss": 1.0874, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.673304527522074, |
|
"grad_norm": 0.25519728660583496, |
|
"learning_rate": 9.965323001890331e-05, |
|
"loss": 1.0319, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.688333646439977, |
|
"grad_norm": 0.25402480363845825, |
|
"learning_rate": 9.963763057867656e-05, |
|
"loss": 1.0268, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.703362765357881, |
|
"grad_norm": 0.25798556208610535, |
|
"learning_rate": 9.962168922023462e-05, |
|
"loss": 1.0365, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.7183918842757846, |
|
"grad_norm": 0.2535860538482666, |
|
"learning_rate": 9.960540605338613e-05, |
|
"loss": 1.0543, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.733421003193688, |
|
"grad_norm": 0.26214438676834106, |
|
"learning_rate": 9.958878119029418e-05, |
|
"loss": 1.0336, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.748450122111591, |
|
"grad_norm": 0.27087315917015076, |
|
"learning_rate": 9.957181474547563e-05, |
|
"loss": 1.0457, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.7634792410294944, |
|
"grad_norm": 0.27433788776397705, |
|
"learning_rate": 9.955450683580018e-05, |
|
"loss": 1.07, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.778508359947398, |
|
"grad_norm": 0.2705138027667999, |
|
"learning_rate": 9.953685758048967e-05, |
|
"loss": 1.0403, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.793537478865302, |
|
"grad_norm": 0.2626933157444, |
|
"learning_rate": 9.951886710111723e-05, |
|
"loss": 1.0464, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.808566597783205, |
|
"grad_norm": 0.27033478021621704, |
|
"learning_rate": 9.950053552160644e-05, |
|
"loss": 1.0653, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.8235957167011083, |
|
"grad_norm": 0.2985825836658478, |
|
"learning_rate": 9.948186296823048e-05, |
|
"loss": 1.0417, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.8386248356190116, |
|
"grad_norm": 0.2883852422237396, |
|
"learning_rate": 9.94628495696112e-05, |
|
"loss": 1.0503, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.8536539545369153, |
|
"grad_norm": 0.25887343287467957, |
|
"learning_rate": 9.94434954567184e-05, |
|
"loss": 1.0526, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.868683073454819, |
|
"grad_norm": 0.26801565289497375, |
|
"learning_rate": 9.94238007628687e-05, |
|
"loss": 1.0917, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.8837121923727222, |
|
"grad_norm": 0.2502713203430176, |
|
"learning_rate": 9.940376562372482e-05, |
|
"loss": 1.0638, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.8987413112906255, |
|
"grad_norm": 0.2549043297767639, |
|
"learning_rate": 9.93833901772945e-05, |
|
"loss": 1.0438, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.9137704302085288, |
|
"grad_norm": 0.26013997197151184, |
|
"learning_rate": 9.936267456392971e-05, |
|
"loss": 1.0759, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.9287995491264325, |
|
"grad_norm": 0.29080161452293396, |
|
"learning_rate": 9.934161892632547e-05, |
|
"loss": 1.0387, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.9438286680443357, |
|
"grad_norm": 0.27860552072525024, |
|
"learning_rate": 9.932022340951909e-05, |
|
"loss": 1.0339, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.9588577869622394, |
|
"grad_norm": 0.25391969084739685, |
|
"learning_rate": 9.929848816088897e-05, |
|
"loss": 1.0503, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.9738869058801427, |
|
"grad_norm": 0.2683584690093994, |
|
"learning_rate": 9.927641333015377e-05, |
|
"loss": 1.0617, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.988916024798046, |
|
"grad_norm": 0.29328426718711853, |
|
"learning_rate": 9.925399906937123e-05, |
|
"loss": 1.068, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.003005823783581, |
|
"grad_norm": 0.26925235986709595, |
|
"learning_rate": 9.923124553293718e-05, |
|
"loss": 1.0641, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.018034942701484, |
|
"grad_norm": 0.2933187186717987, |
|
"learning_rate": 9.920815287758451e-05, |
|
"loss": 1.0264, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.0330640616193874, |
|
"grad_norm": 0.30965468287467957, |
|
"learning_rate": 9.918472126238206e-05, |
|
"loss": 1.0154, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.048093180537291, |
|
"grad_norm": 0.3275061547756195, |
|
"learning_rate": 9.916095084873347e-05, |
|
"loss": 0.9905, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.0631222994551943, |
|
"grad_norm": 0.40177953243255615, |
|
"learning_rate": 9.913684180037619e-05, |
|
"loss": 1.0066, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.078151418373098, |
|
"grad_norm": 0.389649361371994, |
|
"learning_rate": 9.911239428338023e-05, |
|
"loss": 1.0424, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.0931805372910013, |
|
"grad_norm": 0.3205302953720093, |
|
"learning_rate": 9.908760846614709e-05, |
|
"loss": 1.0234, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.1082096562089045, |
|
"grad_norm": 0.3212546408176422, |
|
"learning_rate": 9.906248451940861e-05, |
|
"loss": 1.0075, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.1232387751268083, |
|
"grad_norm": 0.33269983530044556, |
|
"learning_rate": 9.903702261622567e-05, |
|
"loss": 1.0039, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.1382678940447115, |
|
"grad_norm": 0.34872928261756897, |
|
"learning_rate": 9.901122293198719e-05, |
|
"loss": 0.9952, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.153297012962615, |
|
"grad_norm": 0.348037987947464, |
|
"learning_rate": 9.898508564440879e-05, |
|
"loss": 1.0133, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1683261318805185, |
|
"grad_norm": 0.3966461420059204, |
|
"learning_rate": 9.895861093353158e-05, |
|
"loss": 1.0049, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.1833552507984217, |
|
"grad_norm": 0.3553076684474945, |
|
"learning_rate": 9.893179898172095e-05, |
|
"loss": 0.9789, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.1983843697163254, |
|
"grad_norm": 0.38464319705963135, |
|
"learning_rate": 9.890464997366529e-05, |
|
"loss": 1.0062, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.2134134886342287, |
|
"grad_norm": 0.3749645948410034, |
|
"learning_rate": 9.887716409637478e-05, |
|
"loss": 1.0364, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.2284426075521324, |
|
"grad_norm": 0.3553982675075531, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 0.9896, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.2434717264700357, |
|
"grad_norm": 0.34840455651283264, |
|
"learning_rate": 9.882118249373063e-05, |
|
"loss": 0.9954, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.258500845387939, |
|
"grad_norm": 0.34040772914886475, |
|
"learning_rate": 9.879268715399432e-05, |
|
"loss": 1.0224, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.2735299643058426, |
|
"grad_norm": 0.37151041626930237, |
|
"learning_rate": 9.87638557162551e-05, |
|
"loss": 0.9864, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.288559083223746, |
|
"grad_norm": 0.34764307737350464, |
|
"learning_rate": 9.87346883791122e-05, |
|
"loss": 1.0121, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.3035882021416496, |
|
"grad_norm": 0.3537833094596863, |
|
"learning_rate": 9.870518534347853e-05, |
|
"loss": 0.9952, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.318617321059553, |
|
"grad_norm": 0.3364524245262146, |
|
"learning_rate": 9.867534681257951e-05, |
|
"loss": 1.0383, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.333646439977456, |
|
"grad_norm": 0.33494752645492554, |
|
"learning_rate": 9.864517299195144e-05, |
|
"loss": 1.0318, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.34867555889536, |
|
"grad_norm": 0.31135261058807373, |
|
"learning_rate": 9.861466408944027e-05, |
|
"loss": 0.9749, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.363704677813263, |
|
"grad_norm": 0.36317843198776245, |
|
"learning_rate": 9.858382031520005e-05, |
|
"loss": 1.0232, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.378733796731167, |
|
"grad_norm": 0.346181720495224, |
|
"learning_rate": 9.855264188169152e-05, |
|
"loss": 1.0099, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.39376291564907, |
|
"grad_norm": 0.35162779688835144, |
|
"learning_rate": 9.852112900368066e-05, |
|
"loss": 1.0128, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.4087920345669733, |
|
"grad_norm": 0.3490872383117676, |
|
"learning_rate": 9.848928189823723e-05, |
|
"loss": 1.0, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.423821153484877, |
|
"grad_norm": 0.3363298177719116, |
|
"learning_rate": 9.845710078473316e-05, |
|
"loss": 1.0171, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.4388502724027803, |
|
"grad_norm": 0.323453813791275, |
|
"learning_rate": 9.842458588484123e-05, |
|
"loss": 0.9908, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.453879391320684, |
|
"grad_norm": 0.3421192765235901, |
|
"learning_rate": 9.839173742253334e-05, |
|
"loss": 1.0134, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.4689085102385873, |
|
"grad_norm": 0.33773696422576904, |
|
"learning_rate": 9.835855562407912e-05, |
|
"loss": 0.9938, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.4839376291564905, |
|
"grad_norm": 0.34854745864868164, |
|
"learning_rate": 9.83250407180443e-05, |
|
"loss": 0.9922, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.4989667480743942, |
|
"grad_norm": 0.35300213098526, |
|
"learning_rate": 9.829119293528916e-05, |
|
"loss": 1.0067, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.5139958669922975, |
|
"grad_norm": 0.34796491265296936, |
|
"learning_rate": 9.82570125089669e-05, |
|
"loss": 1.0133, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.529024985910201, |
|
"grad_norm": 0.35767292976379395, |
|
"learning_rate": 9.822249967452213e-05, |
|
"loss": 1.0187, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.5440541048281045, |
|
"grad_norm": 0.3610760569572449, |
|
"learning_rate": 9.818765466968909e-05, |
|
"loss": 1.0044, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.5590832237460077, |
|
"grad_norm": 0.3299923241138458, |
|
"learning_rate": 9.815247773449018e-05, |
|
"loss": 0.9999, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.5741123426639114, |
|
"grad_norm": 0.27984675765037537, |
|
"learning_rate": 9.81169691112342e-05, |
|
"loss": 0.9758, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.5891414615818147, |
|
"grad_norm": 0.30341655015945435, |
|
"learning_rate": 9.80811290445147e-05, |
|
"loss": 1.0024, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.6041705804997184, |
|
"grad_norm": 0.33460941910743713, |
|
"learning_rate": 9.804495778120833e-05, |
|
"loss": 1.0167, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.6191996994176217, |
|
"grad_norm": 0.33041292428970337, |
|
"learning_rate": 9.800845557047314e-05, |
|
"loss": 1.0108, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.634228818335525, |
|
"grad_norm": 0.304404079914093, |
|
"learning_rate": 9.797162266374676e-05, |
|
"loss": 1.0052, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.6492579372534286, |
|
"grad_norm": 0.3226507008075714, |
|
"learning_rate": 9.793445931474485e-05, |
|
"loss": 1.0087, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.664287056171332, |
|
"grad_norm": 0.3016469180583954, |
|
"learning_rate": 9.789696577945917e-05, |
|
"loss": 1.0068, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.6793161750892356, |
|
"grad_norm": 0.317958265542984, |
|
"learning_rate": 9.785914231615594e-05, |
|
"loss": 1.0256, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.694345294007139, |
|
"grad_norm": 0.3319275677204132, |
|
"learning_rate": 9.782098918537399e-05, |
|
"loss": 0.9882, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.709374412925042, |
|
"grad_norm": 0.34686529636383057, |
|
"learning_rate": 9.778250664992304e-05, |
|
"loss": 1.0071, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.724403531842946, |
|
"grad_norm": 0.36334285140037537, |
|
"learning_rate": 9.77436949748818e-05, |
|
"loss": 1.0086, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.739432650760849, |
|
"grad_norm": 0.36445969343185425, |
|
"learning_rate": 9.770455442759621e-05, |
|
"loss": 1.0285, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.754461769678753, |
|
"grad_norm": 0.32181107997894287, |
|
"learning_rate": 9.766508527767757e-05, |
|
"loss": 1.0374, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.769490888596656, |
|
"grad_norm": 0.371354341506958, |
|
"learning_rate": 9.762528779700067e-05, |
|
"loss": 1.0192, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.7845200075145593, |
|
"grad_norm": 0.3308964669704437, |
|
"learning_rate": 9.758516225970198e-05, |
|
"loss": 1.0117, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.799549126432463, |
|
"grad_norm": 0.35072851181030273, |
|
"learning_rate": 9.754470894217767e-05, |
|
"loss": 1.02, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.8145782453503663, |
|
"grad_norm": 0.3249657452106476, |
|
"learning_rate": 9.750392812308178e-05, |
|
"loss": 1.0205, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.82960736426827, |
|
"grad_norm": 0.3178282380104065, |
|
"learning_rate": 9.74628200833243e-05, |
|
"loss": 1.0244, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.8446364831861732, |
|
"grad_norm": 0.3914138674736023, |
|
"learning_rate": 9.742138510606915e-05, |
|
"loss": 1.0201, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.8596656021040765, |
|
"grad_norm": 0.3437259793281555, |
|
"learning_rate": 9.737962347673231e-05, |
|
"loss": 1.0067, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.87469472102198, |
|
"grad_norm": 0.3310168385505676, |
|
"learning_rate": 9.733753548297988e-05, |
|
"loss": 1.0215, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.8897238399398835, |
|
"grad_norm": 0.35641738772392273, |
|
"learning_rate": 9.729512141472599e-05, |
|
"loss": 1.0181, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.904752958857787, |
|
"grad_norm": 0.36426904797554016, |
|
"learning_rate": 9.725238156413089e-05, |
|
"loss": 1.0174, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.9197820777756904, |
|
"grad_norm": 0.3366813659667969, |
|
"learning_rate": 9.720931622559893e-05, |
|
"loss": 1.0126, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.9348111966935937, |
|
"grad_norm": 0.3486657440662384, |
|
"learning_rate": 9.716592569577646e-05, |
|
"loss": 1.0161, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.9498403156114974, |
|
"grad_norm": 0.3317498564720154, |
|
"learning_rate": 9.712221027354991e-05, |
|
"loss": 1.0171, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.9648694345294007, |
|
"grad_norm": 0.3477359712123871, |
|
"learning_rate": 9.707817026004362e-05, |
|
"loss": 1.0195, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.9798985534473044, |
|
"grad_norm": 0.30774736404418945, |
|
"learning_rate": 9.70338059586178e-05, |
|
"loss": 1.0261, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.9949276723652076, |
|
"grad_norm": 0.38554686307907104, |
|
"learning_rate": 9.698911767486649e-05, |
|
"loss": 1.0376, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.0090174713507425, |
|
"grad_norm": 0.40208327770233154, |
|
"learning_rate": 9.694410571661537e-05, |
|
"loss": 0.9654, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.024046590268646, |
|
"grad_norm": 0.4230579733848572, |
|
"learning_rate": 9.689877039391968e-05, |
|
"loss": 0.9452, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.039075709186549, |
|
"grad_norm": 0.4582759439945221, |
|
"learning_rate": 9.685311201906215e-05, |
|
"loss": 0.9308, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.054104828104452, |
|
"grad_norm": 0.4000380337238312, |
|
"learning_rate": 9.680713090655072e-05, |
|
"loss": 0.9203, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.0691339470223555, |
|
"grad_norm": 0.3987461030483246, |
|
"learning_rate": 9.676082737311645e-05, |
|
"loss": 0.9427, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.08416306594026, |
|
"grad_norm": 0.4363115429878235, |
|
"learning_rate": 9.671420173771136e-05, |
|
"loss": 0.9249, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.099192184858163, |
|
"grad_norm": 0.39811596274375916, |
|
"learning_rate": 9.666725432150616e-05, |
|
"loss": 0.9205, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.114221303776066, |
|
"grad_norm": 0.4178659915924072, |
|
"learning_rate": 9.661998544788813e-05, |
|
"loss": 0.927, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.1292504226939695, |
|
"grad_norm": 0.43525931239128113, |
|
"learning_rate": 9.657239544245876e-05, |
|
"loss": 0.9172, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.144279541611873, |
|
"grad_norm": 0.38502469658851624, |
|
"learning_rate": 9.652448463303168e-05, |
|
"loss": 0.9331, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.159308660529776, |
|
"grad_norm": 0.50247722864151, |
|
"learning_rate": 9.647625334963024e-05, |
|
"loss": 0.9558, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.17433777944768, |
|
"grad_norm": 0.4176265597343445, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 0.9374, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.189366898365583, |
|
"grad_norm": 0.4144188463687897, |
|
"learning_rate": 9.637883069203314e-05, |
|
"loss": 0.9119, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.204396017283487, |
|
"grad_norm": 0.4362613558769226, |
|
"learning_rate": 9.632963998891262e-05, |
|
"loss": 0.928, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.21942513620139, |
|
"grad_norm": 0.45967820286750793, |
|
"learning_rate": 9.628013015396346e-05, |
|
"loss": 0.9398, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.234454255119294, |
|
"grad_norm": 0.4533185660839081, |
|
"learning_rate": 9.62303015282236e-05, |
|
"loss": 0.9586, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.249483374037197, |
|
"grad_norm": 0.438513845205307, |
|
"learning_rate": 9.618015445492688e-05, |
|
"loss": 0.9469, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.264512492955101, |
|
"grad_norm": 0.45950812101364136, |
|
"learning_rate": 9.612968927950065e-05, |
|
"loss": 0.9438, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.279541611873004, |
|
"grad_norm": 0.42663341760635376, |
|
"learning_rate": 9.607890634956355e-05, |
|
"loss": 0.9461, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.294570730790907, |
|
"grad_norm": 0.4346635043621063, |
|
"learning_rate": 9.602780601492294e-05, |
|
"loss": 0.9323, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.30959984970881, |
|
"grad_norm": 0.4921177327632904, |
|
"learning_rate": 9.597638862757255e-05, |
|
"loss": 0.9337, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.3246289686267145, |
|
"grad_norm": 0.39174574613571167, |
|
"learning_rate": 9.592465454169004e-05, |
|
"loss": 0.938, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.339658087544618, |
|
"grad_norm": 0.40984979271888733, |
|
"learning_rate": 9.587260411363465e-05, |
|
"loss": 0.9461, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.354687206462521, |
|
"grad_norm": 0.37494781613349915, |
|
"learning_rate": 9.582023770194461e-05, |
|
"loss": 0.9407, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.369716325380424, |
|
"grad_norm": 0.35851216316223145, |
|
"learning_rate": 9.57675556673348e-05, |
|
"loss": 0.9285, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.3847454442983285, |
|
"grad_norm": 0.37766364216804504, |
|
"learning_rate": 9.571455837269411e-05, |
|
"loss": 0.9268, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.399774563216232, |
|
"grad_norm": 0.45168834924697876, |
|
"learning_rate": 9.566124618308312e-05, |
|
"loss": 0.9593, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.414803682134135, |
|
"grad_norm": 0.43097320199012756, |
|
"learning_rate": 9.560761946573143e-05, |
|
"loss": 0.9537, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.429832801052038, |
|
"grad_norm": 0.415606826543808, |
|
"learning_rate": 9.555367859003525e-05, |
|
"loss": 0.929, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.4448619199699415, |
|
"grad_norm": 0.3891099989414215, |
|
"learning_rate": 9.54994239275548e-05, |
|
"loss": 0.9103, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.459891038887845, |
|
"grad_norm": 0.3769884705543518, |
|
"learning_rate": 9.544485585201169e-05, |
|
"loss": 0.9234, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.474920157805749, |
|
"grad_norm": 0.46022331714630127, |
|
"learning_rate": 9.538997473928647e-05, |
|
"loss": 0.9734, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.489949276723652, |
|
"grad_norm": 0.36743420362472534, |
|
"learning_rate": 9.533478096741597e-05, |
|
"loss": 0.9025, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.5049783956415554, |
|
"grad_norm": 0.4562210738658905, |
|
"learning_rate": 9.527927491659068e-05, |
|
"loss": 0.9444, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.520007514559459, |
|
"grad_norm": 0.4317024052143097, |
|
"learning_rate": 9.522345696915218e-05, |
|
"loss": 0.9301, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.535036633477363, |
|
"grad_norm": 0.43993476033210754, |
|
"learning_rate": 9.51673275095905e-05, |
|
"loss": 0.9425, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.550065752395266, |
|
"grad_norm": 0.34426409006118774, |
|
"learning_rate": 9.51108869245414e-05, |
|
"loss": 0.9348, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.565094871313169, |
|
"grad_norm": 0.44477733969688416, |
|
"learning_rate": 9.505413560278382e-05, |
|
"loss": 0.9295, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.580123990231073, |
|
"grad_norm": 0.4211689829826355, |
|
"learning_rate": 9.49970739352371e-05, |
|
"loss": 0.933, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.595153109148976, |
|
"grad_norm": 0.45019835233688354, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 0.9471, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.610182228066879, |
|
"grad_norm": 0.42713072896003723, |
|
"learning_rate": 9.488202113713973e-05, |
|
"loss": 0.953, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.625211346984783, |
|
"grad_norm": 0.41138195991516113, |
|
"learning_rate": 9.482403079910571e-05, |
|
"loss": 0.9398, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.640240465902687, |
|
"grad_norm": 0.42336663603782654, |
|
"learning_rate": 9.476573170031035e-05, |
|
"loss": 0.9342, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 4.65526958482059, |
|
"grad_norm": 0.4236120581626892, |
|
"learning_rate": 9.470712424233452e-05, |
|
"loss": 0.9306, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.670298703738493, |
|
"grad_norm": 0.47870710492134094, |
|
"learning_rate": 9.464820882888319e-05, |
|
"loss": 0.9763, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 4.685327822656397, |
|
"grad_norm": 0.44699183106422424, |
|
"learning_rate": 9.45889858657826e-05, |
|
"loss": 0.9479, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.7003569415743005, |
|
"grad_norm": 0.41658318042755127, |
|
"learning_rate": 9.452945576097748e-05, |
|
"loss": 0.9381, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.715386060492204, |
|
"grad_norm": 0.42650163173675537, |
|
"learning_rate": 9.446961892452824e-05, |
|
"loss": 0.9333, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.730415179410107, |
|
"grad_norm": 0.4480834901332855, |
|
"learning_rate": 9.440947576860814e-05, |
|
"loss": 0.9349, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.74544429832801, |
|
"grad_norm": 0.41825857758522034, |
|
"learning_rate": 9.434902670750047e-05, |
|
"loss": 0.9768, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.7604734172459136, |
|
"grad_norm": 0.38604798913002014, |
|
"learning_rate": 9.428827215759568e-05, |
|
"loss": 0.9374, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.775502536163818, |
|
"grad_norm": 0.43158042430877686, |
|
"learning_rate": 9.42272125373885e-05, |
|
"loss": 0.942, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.790531655081721, |
|
"grad_norm": 0.4181406497955322, |
|
"learning_rate": 9.416584826747509e-05, |
|
"loss": 0.9427, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.805560773999624, |
|
"grad_norm": 0.42289501428604126, |
|
"learning_rate": 9.410417977055011e-05, |
|
"loss": 0.9731, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.8205898929175275, |
|
"grad_norm": 0.42214304208755493, |
|
"learning_rate": 9.404220747140382e-05, |
|
"loss": 0.9236, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.835619011835432, |
|
"grad_norm": 0.4040350019931793, |
|
"learning_rate": 9.397993179691917e-05, |
|
"loss": 0.9478, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.850648130753335, |
|
"grad_norm": 0.40848028659820557, |
|
"learning_rate": 9.391735317606885e-05, |
|
"loss": 0.955, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.865677249671238, |
|
"grad_norm": 0.46537673473358154, |
|
"learning_rate": 9.385447203991231e-05, |
|
"loss": 0.9618, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.880706368589141, |
|
"grad_norm": 0.419888973236084, |
|
"learning_rate": 9.379128882159283e-05, |
|
"loss": 0.9686, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.895735487507045, |
|
"grad_norm": 0.3668920397758484, |
|
"learning_rate": 9.372780395633451e-05, |
|
"loss": 0.9389, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.910764606424948, |
|
"grad_norm": 0.3719962239265442, |
|
"learning_rate": 9.36640178814393e-05, |
|
"loss": 0.9546, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.925793725342852, |
|
"grad_norm": 0.3528194725513458, |
|
"learning_rate": 9.359993103628393e-05, |
|
"loss": 0.9492, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.940822844260755, |
|
"grad_norm": 0.4485328495502472, |
|
"learning_rate": 9.353554386231695e-05, |
|
"loss": 0.9555, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.955851963178659, |
|
"grad_norm": 0.4136585593223572, |
|
"learning_rate": 9.347085680305565e-05, |
|
"loss": 0.9383, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.970881082096562, |
|
"grad_norm": 0.4350145757198334, |
|
"learning_rate": 9.340587030408304e-05, |
|
"loss": 0.9432, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.985910201014466, |
|
"grad_norm": 0.5096591114997864, |
|
"learning_rate": 9.334058481304471e-05, |
|
"loss": 0.9451, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.6608612537384033, |
|
"learning_rate": 9.327500077964584e-05, |
|
"loss": 0.935, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 5.015029118917903, |
|
"grad_norm": 0.4970506429672241, |
|
"learning_rate": 9.320911865564802e-05, |
|
"loss": 0.8215, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.0300582378358065, |
|
"grad_norm": 0.4373551607131958, |
|
"learning_rate": 9.314293889486619e-05, |
|
"loss": 0.8335, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.045087356753711, |
|
"grad_norm": 0.47342097759246826, |
|
"learning_rate": 9.30764619531655e-05, |
|
"loss": 0.8232, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 5.060116475671614, |
|
"grad_norm": 0.4043892025947571, |
|
"learning_rate": 9.300968828845817e-05, |
|
"loss": 0.8394, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 5.075145594589517, |
|
"grad_norm": 0.5077358484268188, |
|
"learning_rate": 9.294261836070032e-05, |
|
"loss": 0.8202, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 5.0901747135074205, |
|
"grad_norm": 0.5389407277107239, |
|
"learning_rate": 9.28752526318888e-05, |
|
"loss": 0.812, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 5.105203832425324, |
|
"grad_norm": 0.5698477625846863, |
|
"learning_rate": 9.28075915660581e-05, |
|
"loss": 0.8424, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.120232951343228, |
|
"grad_norm": 0.47804853320121765, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 0.8513, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 5.135262070261131, |
|
"grad_norm": 0.5664450526237488, |
|
"learning_rate": 9.267138528964536e-05, |
|
"loss": 0.8276, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 5.150291189179034, |
|
"grad_norm": 0.5398600697517395, |
|
"learning_rate": 9.260284101729116e-05, |
|
"loss": 0.8398, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 5.165320308096938, |
|
"grad_norm": 0.5055420398712158, |
|
"learning_rate": 9.253400328436699e-05, |
|
"loss": 0.8297, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 5.180349427014841, |
|
"grad_norm": 0.4511585831642151, |
|
"learning_rate": 9.246487256504682e-05, |
|
"loss": 0.8141, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.195378545932745, |
|
"grad_norm": 0.5470993518829346, |
|
"learning_rate": 9.239544933552286e-05, |
|
"loss": 0.8434, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 5.210407664850648, |
|
"grad_norm": 0.4637773036956787, |
|
"learning_rate": 9.232573407400221e-05, |
|
"loss": 0.8497, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 5.225436783768552, |
|
"grad_norm": 0.4901561141014099, |
|
"learning_rate": 9.225572726070354e-05, |
|
"loss": 0.8361, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 5.240465902686455, |
|
"grad_norm": 0.531245231628418, |
|
"learning_rate": 9.218542937785384e-05, |
|
"loss": 0.8506, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 5.255495021604358, |
|
"grad_norm": 0.5206908583641052, |
|
"learning_rate": 9.211484090968506e-05, |
|
"loss": 0.8347, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.270524140522262, |
|
"grad_norm": 0.5049258470535278, |
|
"learning_rate": 9.204396234243076e-05, |
|
"loss": 0.8383, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 5.2855532594401655, |
|
"grad_norm": 0.5462550520896912, |
|
"learning_rate": 9.197279416432284e-05, |
|
"loss": 0.8301, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 5.300582378358069, |
|
"grad_norm": 0.5243920683860779, |
|
"learning_rate": 9.190133686558808e-05, |
|
"loss": 0.8392, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 5.315611497275972, |
|
"grad_norm": 0.5010761618614197, |
|
"learning_rate": 9.182959093844483e-05, |
|
"loss": 0.8215, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 5.330640616193875, |
|
"grad_norm": 0.5377451181411743, |
|
"learning_rate": 9.175755687709956e-05, |
|
"loss": 0.8311, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 5.3456697351117795, |
|
"grad_norm": 0.5271348357200623, |
|
"learning_rate": 9.168523517774356e-05, |
|
"loss": 0.8266, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 5.360698854029683, |
|
"grad_norm": 0.48982876539230347, |
|
"learning_rate": 9.161262633854935e-05, |
|
"loss": 0.8571, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 5.375727972947586, |
|
"grad_norm": 0.5555334687232971, |
|
"learning_rate": 9.153973085966746e-05, |
|
"loss": 0.8414, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 5.390757091865489, |
|
"grad_norm": 0.5088291764259338, |
|
"learning_rate": 9.146654924322277e-05, |
|
"loss": 0.8541, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 5.4057862107833925, |
|
"grad_norm": 0.6044062376022339, |
|
"learning_rate": 9.139308199331125e-05, |
|
"loss": 0.8553, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.420815329701297, |
|
"grad_norm": 0.549253523349762, |
|
"learning_rate": 9.131932961599636e-05, |
|
"loss": 0.8303, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 5.4358444486192, |
|
"grad_norm": 0.5907899737358093, |
|
"learning_rate": 9.124529261930559e-05, |
|
"loss": 0.8264, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 5.450873567537103, |
|
"grad_norm": 0.5540890097618103, |
|
"learning_rate": 9.117097151322697e-05, |
|
"loss": 0.8292, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 5.465902686455006, |
|
"grad_norm": 0.5545858144760132, |
|
"learning_rate": 9.109636680970557e-05, |
|
"loss": 0.8382, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 5.48093180537291, |
|
"grad_norm": 0.5407220721244812, |
|
"learning_rate": 9.102147902263995e-05, |
|
"loss": 0.863, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 5.495960924290814, |
|
"grad_norm": 0.5022987723350525, |
|
"learning_rate": 9.094630866787863e-05, |
|
"loss": 0.8624, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 5.510990043208717, |
|
"grad_norm": 0.5069270730018616, |
|
"learning_rate": 9.087085626321657e-05, |
|
"loss": 0.8494, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 5.52601916212662, |
|
"grad_norm": 0.586992621421814, |
|
"learning_rate": 9.07951223283915e-05, |
|
"loss": 0.8708, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 5.541048281044524, |
|
"grad_norm": 0.48386263847351074, |
|
"learning_rate": 9.071910738508048e-05, |
|
"loss": 0.8327, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 5.556077399962427, |
|
"grad_norm": 0.5556206703186035, |
|
"learning_rate": 9.064281195689621e-05, |
|
"loss": 0.8506, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.571106518880331, |
|
"grad_norm": 0.4873793423175812, |
|
"learning_rate": 9.056623656938344e-05, |
|
"loss": 0.8314, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 5.586135637798234, |
|
"grad_norm": 0.5752863883972168, |
|
"learning_rate": 9.048938175001535e-05, |
|
"loss": 0.8559, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 5.601164756716138, |
|
"grad_norm": 0.5001512765884399, |
|
"learning_rate": 9.041224802818999e-05, |
|
"loss": 0.8517, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 5.616193875634041, |
|
"grad_norm": 0.5640326142311096, |
|
"learning_rate": 9.033483593522651e-05, |
|
"loss": 0.8471, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 5.631222994551944, |
|
"grad_norm": 0.544611930847168, |
|
"learning_rate": 9.025714600436157e-05, |
|
"loss": 0.8314, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.646252113469847, |
|
"grad_norm": 0.5598495602607727, |
|
"learning_rate": 9.017917877074565e-05, |
|
"loss": 0.8454, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 5.6612812323877515, |
|
"grad_norm": 0.6049039959907532, |
|
"learning_rate": 9.010093477143942e-05, |
|
"loss": 0.8376, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 5.676310351305655, |
|
"grad_norm": 0.5953666567802429, |
|
"learning_rate": 9.002241454540992e-05, |
|
"loss": 0.8655, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 5.691339470223558, |
|
"grad_norm": 0.5012089610099792, |
|
"learning_rate": 8.994361863352696e-05, |
|
"loss": 0.8556, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 5.706368589141461, |
|
"grad_norm": 0.5770487189292908, |
|
"learning_rate": 8.986454757855938e-05, |
|
"loss": 0.8613, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.721397708059365, |
|
"grad_norm": 0.5475596189498901, |
|
"learning_rate": 8.978520192517121e-05, |
|
"loss": 0.8689, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 5.736426826977269, |
|
"grad_norm": 0.4748040437698364, |
|
"learning_rate": 8.970558221991807e-05, |
|
"loss": 0.8444, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 5.751455945895172, |
|
"grad_norm": 0.5324169993400574, |
|
"learning_rate": 8.962568901124327e-05, |
|
"loss": 0.8642, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 5.766485064813075, |
|
"grad_norm": 0.5375658869743347, |
|
"learning_rate": 8.954552284947411e-05, |
|
"loss": 0.8528, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.7815141837309785, |
|
"grad_norm": 0.5448617339134216, |
|
"learning_rate": 8.946508428681807e-05, |
|
"loss": 0.8394, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 5.796543302648882, |
|
"grad_norm": 0.5199793577194214, |
|
"learning_rate": 8.938437387735903e-05, |
|
"loss": 0.8615, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 5.811572421566786, |
|
"grad_norm": 0.5268539190292358, |
|
"learning_rate": 8.930339217705337e-05, |
|
"loss": 0.8661, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 5.826601540484689, |
|
"grad_norm": 0.5181281566619873, |
|
"learning_rate": 8.922213974372628e-05, |
|
"loss": 0.8643, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 5.841630659402592, |
|
"grad_norm": 0.5384554862976074, |
|
"learning_rate": 8.914061713706776e-05, |
|
"loss": 0.8355, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 5.856659778320496, |
|
"grad_norm": 0.5838069319725037, |
|
"learning_rate": 8.905882491862888e-05, |
|
"loss": 0.8723, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.8716888972384, |
|
"grad_norm": 0.5165135860443115, |
|
"learning_rate": 8.897676365181784e-05, |
|
"loss": 0.8298, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.886718016156303, |
|
"grad_norm": 0.5289579033851624, |
|
"learning_rate": 8.889443390189618e-05, |
|
"loss": 0.8664, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 5.901747135074206, |
|
"grad_norm": 0.4891420304775238, |
|
"learning_rate": 8.88118362359748e-05, |
|
"loss": 0.8503, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 5.91677625399211, |
|
"grad_norm": 0.49529027938842773, |
|
"learning_rate": 8.872897122301004e-05, |
|
"loss": 0.8497, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 5.931805372910013, |
|
"grad_norm": 0.6124776601791382, |
|
"learning_rate": 8.864583943379987e-05, |
|
"loss": 0.8829, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 5.946834491827916, |
|
"grad_norm": 0.5730892419815063, |
|
"learning_rate": 8.856244144097988e-05, |
|
"loss": 0.8372, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 5.96186361074582, |
|
"grad_norm": 0.5806572437286377, |
|
"learning_rate": 8.847877781901928e-05, |
|
"loss": 0.8661, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 5.9768927296637235, |
|
"grad_norm": 0.5184414386749268, |
|
"learning_rate": 8.83948491442171e-05, |
|
"loss": 0.8747, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 5.991921848581627, |
|
"grad_norm": 0.5810568332672119, |
|
"learning_rate": 8.831065599469806e-05, |
|
"loss": 0.8747, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 6.006011647567162, |
|
"grad_norm": 0.5326306819915771, |
|
"learning_rate": 8.822619895040868e-05, |
|
"loss": 0.7988, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.021040766485065, |
|
"grad_norm": 0.5372363924980164, |
|
"learning_rate": 8.814147859311332e-05, |
|
"loss": 0.712, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 6.036069885402968, |
|
"grad_norm": 0.6200835108757019, |
|
"learning_rate": 8.805649550639004e-05, |
|
"loss": 0.7213, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 6.051099004320871, |
|
"grad_norm": 0.5874983072280884, |
|
"learning_rate": 8.797125027562665e-05, |
|
"loss": 0.7096, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 6.066128123238775, |
|
"grad_norm": 0.6422827243804932, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 0.7223, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 6.081157242156679, |
|
"grad_norm": 0.641160786151886, |
|
"learning_rate": 8.779997573255553e-05, |
|
"loss": 0.7231, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.096186361074582, |
|
"grad_norm": 0.7293818593025208, |
|
"learning_rate": 8.771394760003593e-05, |
|
"loss": 0.7092, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 6.111215479992485, |
|
"grad_norm": 0.60944664478302, |
|
"learning_rate": 8.762765968304431e-05, |
|
"loss": 0.7203, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 6.126244598910389, |
|
"grad_norm": 0.6189725399017334, |
|
"learning_rate": 8.754111257595657e-05, |
|
"loss": 0.7136, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 6.141273717828292, |
|
"grad_norm": 0.6322532296180725, |
|
"learning_rate": 8.745430687493396e-05, |
|
"loss": 0.7382, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 6.156302836746196, |
|
"grad_norm": 0.6236686706542969, |
|
"learning_rate": 8.736724317791902e-05, |
|
"loss": 0.7221, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.171331955664099, |
|
"grad_norm": 0.5708134174346924, |
|
"learning_rate": 8.727992208463143e-05, |
|
"loss": 0.7205, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 6.186361074582003, |
|
"grad_norm": 0.6412458419799805, |
|
"learning_rate": 8.719234419656387e-05, |
|
"loss": 0.7306, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 6.201390193499906, |
|
"grad_norm": 0.6535741686820984, |
|
"learning_rate": 8.710451011697793e-05, |
|
"loss": 0.7169, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 6.216419312417809, |
|
"grad_norm": 0.6490382552146912, |
|
"learning_rate": 8.701642045089992e-05, |
|
"loss": 0.7145, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.231448431335713, |
|
"grad_norm": 0.7014051079750061, |
|
"learning_rate": 8.692807580511667e-05, |
|
"loss": 0.7569, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 6.2464775502536165, |
|
"grad_norm": 0.7195674180984497, |
|
"learning_rate": 8.683947678817139e-05, |
|
"loss": 0.7244, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 6.26150666917152, |
|
"grad_norm": 0.6836762428283691, |
|
"learning_rate": 8.675062401035952e-05, |
|
"loss": 0.7303, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 6.276535788089423, |
|
"grad_norm": 0.6135929822921753, |
|
"learning_rate": 8.666151808372439e-05, |
|
"loss": 0.7179, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.291564907007326, |
|
"grad_norm": 0.6589913368225098, |
|
"learning_rate": 8.657215962205319e-05, |
|
"loss": 0.7455, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 6.30659402592523, |
|
"grad_norm": 0.6406304836273193, |
|
"learning_rate": 8.648254924087254e-05, |
|
"loss": 0.7496, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.321623144843134, |
|
"grad_norm": 0.6410109400749207, |
|
"learning_rate": 8.639268755744447e-05, |
|
"loss": 0.7355, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 6.336652263761037, |
|
"grad_norm": 0.6654278039932251, |
|
"learning_rate": 8.630257519076196e-05, |
|
"loss": 0.7367, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 6.35168138267894, |
|
"grad_norm": 0.588206946849823, |
|
"learning_rate": 8.621221276154481e-05, |
|
"loss": 0.7255, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 6.3667105015968435, |
|
"grad_norm": 0.633627712726593, |
|
"learning_rate": 8.612160089223529e-05, |
|
"loss": 0.7248, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 6.381739620514748, |
|
"grad_norm": 0.6771560311317444, |
|
"learning_rate": 8.603074020699393e-05, |
|
"loss": 0.7393, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 6.396768739432651, |
|
"grad_norm": 0.682534396648407, |
|
"learning_rate": 8.593963133169514e-05, |
|
"loss": 0.7406, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 6.411797858350554, |
|
"grad_norm": 0.6308305859565735, |
|
"learning_rate": 8.584827489392293e-05, |
|
"loss": 0.751, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 6.426826977268457, |
|
"grad_norm": 0.7026039958000183, |
|
"learning_rate": 8.575667152296665e-05, |
|
"loss": 0.7335, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 6.441856096186361, |
|
"grad_norm": 0.6078832149505615, |
|
"learning_rate": 8.566482184981651e-05, |
|
"loss": 0.752, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 6.456885215104265, |
|
"grad_norm": 0.6271105408668518, |
|
"learning_rate": 8.557272650715939e-05, |
|
"loss": 0.7436, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.471914334022168, |
|
"grad_norm": 0.7435263991355896, |
|
"learning_rate": 8.54803861293744e-05, |
|
"loss": 0.7516, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 6.486943452940071, |
|
"grad_norm": 0.6983492970466614, |
|
"learning_rate": 8.538780135252844e-05, |
|
"loss": 0.7369, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 6.501972571857975, |
|
"grad_norm": 0.6141520738601685, |
|
"learning_rate": 8.529497281437204e-05, |
|
"loss": 0.7415, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 6.517001690775878, |
|
"grad_norm": 0.580833375453949, |
|
"learning_rate": 8.520190115433473e-05, |
|
"loss": 0.7542, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 6.532030809693782, |
|
"grad_norm": 0.6651113033294678, |
|
"learning_rate": 8.510858701352076e-05, |
|
"loss": 0.7251, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 6.547059928611685, |
|
"grad_norm": 0.676468551158905, |
|
"learning_rate": 8.501503103470466e-05, |
|
"loss": 0.7377, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 6.5620890475295885, |
|
"grad_norm": 0.6262651085853577, |
|
"learning_rate": 8.492123386232677e-05, |
|
"loss": 0.7158, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 6.577118166447492, |
|
"grad_norm": 0.7301998138427734, |
|
"learning_rate": 8.482719614248894e-05, |
|
"loss": 0.7483, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 6.592147285365395, |
|
"grad_norm": 0.602796733379364, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 0.7332, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 6.607176404283299, |
|
"grad_norm": 0.6329184770584106, |
|
"learning_rate": 8.463840165312082e-05, |
|
"loss": 0.7518, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.6222055232012025, |
|
"grad_norm": 0.7019734382629395, |
|
"learning_rate": 8.454364618406106e-05, |
|
"loss": 0.7702, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 6.637234642119106, |
|
"grad_norm": 0.6546521782875061, |
|
"learning_rate": 8.444865276847338e-05, |
|
"loss": 0.751, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 6.652263761037009, |
|
"grad_norm": 0.7014687657356262, |
|
"learning_rate": 8.435342206069965e-05, |
|
"loss": 0.7662, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 6.667292879954912, |
|
"grad_norm": 0.6677362322807312, |
|
"learning_rate": 8.425795471671625e-05, |
|
"loss": 0.74, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 6.682321998872816, |
|
"grad_norm": 0.6421080231666565, |
|
"learning_rate": 8.416225139412959e-05, |
|
"loss": 0.7491, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 6.69735111779072, |
|
"grad_norm": 0.6495652794837952, |
|
"learning_rate": 8.406631275217156e-05, |
|
"loss": 0.7612, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 6.712380236708623, |
|
"grad_norm": 0.7310630679130554, |
|
"learning_rate": 8.397013945169501e-05, |
|
"loss": 0.7475, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 6.727409355626526, |
|
"grad_norm": 0.6594589948654175, |
|
"learning_rate": 8.387373215516918e-05, |
|
"loss": 0.7295, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 6.7424384745444295, |
|
"grad_norm": 0.6998351216316223, |
|
"learning_rate": 8.377709152667512e-05, |
|
"loss": 0.756, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 6.757467593462334, |
|
"grad_norm": 0.6579599380493164, |
|
"learning_rate": 8.368021823190116e-05, |
|
"loss": 0.7256, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.772496712380237, |
|
"grad_norm": 0.6116402745246887, |
|
"learning_rate": 8.358311293813832e-05, |
|
"loss": 0.7358, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 6.78752583129814, |
|
"grad_norm": 0.6876879930496216, |
|
"learning_rate": 8.348577631427566e-05, |
|
"loss": 0.7568, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 6.802554950216043, |
|
"grad_norm": 0.6426005363464355, |
|
"learning_rate": 8.33882090307957e-05, |
|
"loss": 0.7563, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 6.817584069133947, |
|
"grad_norm": 0.6187247633934021, |
|
"learning_rate": 8.329041175976987e-05, |
|
"loss": 0.7367, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 6.832613188051851, |
|
"grad_norm": 0.6543039679527283, |
|
"learning_rate": 8.319238517485375e-05, |
|
"loss": 0.7577, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 6.847642306969754, |
|
"grad_norm": 0.6411317586898804, |
|
"learning_rate": 8.309412995128256e-05, |
|
"loss": 0.7614, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 6.862671425887657, |
|
"grad_norm": 0.7125687599182129, |
|
"learning_rate": 8.299564676586638e-05, |
|
"loss": 0.7572, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 6.877700544805561, |
|
"grad_norm": 0.7412214875221252, |
|
"learning_rate": 8.289693629698564e-05, |
|
"loss": 0.7724, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 6.892729663723464, |
|
"grad_norm": 0.6838482022285461, |
|
"learning_rate": 8.279799922458629e-05, |
|
"loss": 0.7428, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 6.907758782641368, |
|
"grad_norm": 0.6079447269439697, |
|
"learning_rate": 8.269883623017522e-05, |
|
"loss": 0.7515, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.922787901559271, |
|
"grad_norm": 0.7181859612464905, |
|
"learning_rate": 8.259944799681555e-05, |
|
"loss": 0.7472, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 6.9378170204771745, |
|
"grad_norm": 0.7185594439506531, |
|
"learning_rate": 8.249983520912187e-05, |
|
"loss": 0.7582, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 6.952846139395078, |
|
"grad_norm": 0.7397907972335815, |
|
"learning_rate": 8.239999855325563e-05, |
|
"loss": 0.7578, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 6.967875258312981, |
|
"grad_norm": 0.6544892191886902, |
|
"learning_rate": 8.229993871692028e-05, |
|
"loss": 0.7511, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 6.982904377230885, |
|
"grad_norm": 0.7269999384880066, |
|
"learning_rate": 8.219965638935662e-05, |
|
"loss": 0.7557, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 6.9979334961487885, |
|
"grad_norm": 0.7143056392669678, |
|
"learning_rate": 8.209915226133807e-05, |
|
"loss": 0.7603, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 7.012023295134322, |
|
"grad_norm": 0.740738034248352, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 0.6384, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 7.027052414052227, |
|
"grad_norm": 0.7142441868782043, |
|
"learning_rate": 8.189748137466417e-05, |
|
"loss": 0.6018, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 7.04208153297013, |
|
"grad_norm": 0.8026095628738403, |
|
"learning_rate": 8.179631600517565e-05, |
|
"loss": 0.6187, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 7.057110651888033, |
|
"grad_norm": 0.8209463953971863, |
|
"learning_rate": 8.169493161355633e-05, |
|
"loss": 0.6178, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.072139770805936, |
|
"grad_norm": 0.7156078219413757, |
|
"learning_rate": 8.159332889817088e-05, |
|
"loss": 0.6223, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 7.08716888972384, |
|
"grad_norm": 0.7837380170822144, |
|
"learning_rate": 8.149150855888794e-05, |
|
"loss": 0.603, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 7.102198008641744, |
|
"grad_norm": 0.7317357063293457, |
|
"learning_rate": 8.138947129707517e-05, |
|
"loss": 0.6183, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 7.117227127559647, |
|
"grad_norm": 0.6778579950332642, |
|
"learning_rate": 8.128721781559443e-05, |
|
"loss": 0.6123, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 7.13225624647755, |
|
"grad_norm": 0.6829363703727722, |
|
"learning_rate": 8.118474881879701e-05, |
|
"loss": 0.6111, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 7.147285365395454, |
|
"grad_norm": 0.7064921855926514, |
|
"learning_rate": 8.108206501251866e-05, |
|
"loss": 0.6142, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 7.162314484313357, |
|
"grad_norm": 0.7147718071937561, |
|
"learning_rate": 8.097916710407492e-05, |
|
"loss": 0.6128, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 7.177343603231261, |
|
"grad_norm": 0.7428337335586548, |
|
"learning_rate": 8.0876055802256e-05, |
|
"loss": 0.6087, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 7.192372722149164, |
|
"grad_norm": 0.7002803087234497, |
|
"learning_rate": 8.077273181732207e-05, |
|
"loss": 0.6421, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 7.2074018410670675, |
|
"grad_norm": 0.7221034169197083, |
|
"learning_rate": 8.066919586099834e-05, |
|
"loss": 0.6159, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.222430959984971, |
|
"grad_norm": 0.7155001759529114, |
|
"learning_rate": 8.056544864647015e-05, |
|
"loss": 0.6227, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 7.237460078902874, |
|
"grad_norm": 0.828462541103363, |
|
"learning_rate": 8.046149088837802e-05, |
|
"loss": 0.6249, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 7.252489197820778, |
|
"grad_norm": 0.7177339792251587, |
|
"learning_rate": 8.035732330281273e-05, |
|
"loss": 0.6205, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.267518316738681, |
|
"grad_norm": 0.7466073632240295, |
|
"learning_rate": 8.025294660731048e-05, |
|
"loss": 0.6225, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 7.282547435656585, |
|
"grad_norm": 0.7658254504203796, |
|
"learning_rate": 8.014836152084784e-05, |
|
"loss": 0.6259, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 7.297576554574488, |
|
"grad_norm": 0.7269898653030396, |
|
"learning_rate": 8.00435687638368e-05, |
|
"loss": 0.6228, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 7.312605673492391, |
|
"grad_norm": 0.8240427374839783, |
|
"learning_rate": 7.993856905811991e-05, |
|
"loss": 0.6242, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 7.327634792410295, |
|
"grad_norm": 0.7971922755241394, |
|
"learning_rate": 7.983336312696522e-05, |
|
"loss": 0.6272, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 7.342663911328199, |
|
"grad_norm": 0.7452378869056702, |
|
"learning_rate": 7.972795169506129e-05, |
|
"loss": 0.6214, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 7.357693030246102, |
|
"grad_norm": 0.7922284603118896, |
|
"learning_rate": 7.962233548851227e-05, |
|
"loss": 0.6257, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.372722149164005, |
|
"grad_norm": 0.8231662511825562, |
|
"learning_rate": 7.951651523483283e-05, |
|
"loss": 0.6288, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 7.387751268081908, |
|
"grad_norm": 0.7604002952575684, |
|
"learning_rate": 7.941049166294319e-05, |
|
"loss": 0.6416, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 7.402780386999812, |
|
"grad_norm": 0.7322626709938049, |
|
"learning_rate": 7.930426550316406e-05, |
|
"loss": 0.628, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 7.417809505917716, |
|
"grad_norm": 0.7688371539115906, |
|
"learning_rate": 7.919783748721168e-05, |
|
"loss": 0.6245, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 7.432838624835619, |
|
"grad_norm": 0.8524195551872253, |
|
"learning_rate": 7.909120834819268e-05, |
|
"loss": 0.6431, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 7.447867743753522, |
|
"grad_norm": 0.8562901020050049, |
|
"learning_rate": 7.898437882059913e-05, |
|
"loss": 0.6291, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 7.462896862671426, |
|
"grad_norm": 0.7663971185684204, |
|
"learning_rate": 7.887734964030337e-05, |
|
"loss": 0.6361, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 7.47792598158933, |
|
"grad_norm": 0.7779290676116943, |
|
"learning_rate": 7.87701215445531e-05, |
|
"loss": 0.6321, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 7.492955100507233, |
|
"grad_norm": 0.8450044393539429, |
|
"learning_rate": 7.86626952719661e-05, |
|
"loss": 0.6554, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 7.507984219425136, |
|
"grad_norm": 0.7660729885101318, |
|
"learning_rate": 7.855507156252535e-05, |
|
"loss": 0.6546, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.5230133383430395, |
|
"grad_norm": 0.9639895558357239, |
|
"learning_rate": 7.844725115757375e-05, |
|
"loss": 0.6388, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 7.538042457260943, |
|
"grad_norm": 0.8670216798782349, |
|
"learning_rate": 7.833923479980914e-05, |
|
"loss": 0.6489, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 7.553071576178846, |
|
"grad_norm": 0.7850314974784851, |
|
"learning_rate": 7.823102323327911e-05, |
|
"loss": 0.6397, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 7.56810069509675, |
|
"grad_norm": 0.7203473448753357, |
|
"learning_rate": 7.812261720337594e-05, |
|
"loss": 0.6466, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 7.5831298140146535, |
|
"grad_norm": 0.7159662246704102, |
|
"learning_rate": 7.801401745683143e-05, |
|
"loss": 0.6336, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 7.598158932932557, |
|
"grad_norm": 0.8092458844184875, |
|
"learning_rate": 7.79052247417117e-05, |
|
"loss": 0.6415, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 7.61318805185046, |
|
"grad_norm": 0.7300180196762085, |
|
"learning_rate": 7.779623980741214e-05, |
|
"loss": 0.6469, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 7.628217170768364, |
|
"grad_norm": 0.8448249697685242, |
|
"learning_rate": 7.768706340465219e-05, |
|
"loss": 0.6281, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 7.643246289686267, |
|
"grad_norm": 0.7753276824951172, |
|
"learning_rate": 7.757769628547018e-05, |
|
"loss": 0.644, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 7.658275408604171, |
|
"grad_norm": 0.7004479765892029, |
|
"learning_rate": 7.746813920321816e-05, |
|
"loss": 0.6349, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.673304527522074, |
|
"grad_norm": 0.7119005918502808, |
|
"learning_rate": 7.735839291255667e-05, |
|
"loss": 0.6477, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 7.688333646439977, |
|
"grad_norm": 0.8026734590530396, |
|
"learning_rate": 7.724845816944961e-05, |
|
"loss": 0.6302, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 7.7033627653578804, |
|
"grad_norm": 0.7971638441085815, |
|
"learning_rate": 7.713833573115894e-05, |
|
"loss": 0.642, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 7.718391884275785, |
|
"grad_norm": 0.7363801598548889, |
|
"learning_rate": 7.70280263562396e-05, |
|
"loss": 0.6509, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 7.733421003193688, |
|
"grad_norm": 0.7832568883895874, |
|
"learning_rate": 7.691753080453412e-05, |
|
"loss": 0.6517, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 7.748450122111591, |
|
"grad_norm": 0.7115653157234192, |
|
"learning_rate": 7.680684983716753e-05, |
|
"loss": 0.6484, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 7.763479241029494, |
|
"grad_norm": 0.7662774324417114, |
|
"learning_rate": 7.6695984216542e-05, |
|
"loss": 0.6496, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 7.7785083599473985, |
|
"grad_norm": 0.7544398307800293, |
|
"learning_rate": 7.658493470633173e-05, |
|
"loss": 0.6394, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 7.793537478865302, |
|
"grad_norm": 0.7812057733535767, |
|
"learning_rate": 7.647370207147748e-05, |
|
"loss": 0.6494, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 7.808566597783205, |
|
"grad_norm": 0.7722028493881226, |
|
"learning_rate": 7.636228707818154e-05, |
|
"loss": 0.6395, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.823595716701108, |
|
"grad_norm": 0.776189923286438, |
|
"learning_rate": 7.625069049390227e-05, |
|
"loss": 0.6474, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 7.838624835619012, |
|
"grad_norm": 0.6927589178085327, |
|
"learning_rate": 7.613891308734894e-05, |
|
"loss": 0.6419, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 7.853653954536915, |
|
"grad_norm": 0.8120152354240417, |
|
"learning_rate": 7.60269556284763e-05, |
|
"loss": 0.6638, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 7.868683073454819, |
|
"grad_norm": 0.8518467545509338, |
|
"learning_rate": 7.59148188884794e-05, |
|
"loss": 0.6546, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 7.883712192372722, |
|
"grad_norm": 0.8371894359588623, |
|
"learning_rate": 7.580250363978824e-05, |
|
"loss": 0.6567, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 7.8987413112906255, |
|
"grad_norm": 0.8003565669059753, |
|
"learning_rate": 7.569001065606238e-05, |
|
"loss": 0.6443, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 7.913770430208529, |
|
"grad_norm": 0.8672810196876526, |
|
"learning_rate": 7.557734071218576e-05, |
|
"loss": 0.6559, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 7.928799549126433, |
|
"grad_norm": 0.7518348097801208, |
|
"learning_rate": 7.546449458426117e-05, |
|
"loss": 0.6579, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 7.943828668044336, |
|
"grad_norm": 0.8424391150474548, |
|
"learning_rate": 7.535147304960508e-05, |
|
"loss": 0.6588, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 7.9588577869622394, |
|
"grad_norm": 0.7776015996932983, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 0.6516, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.973886905880143, |
|
"grad_norm": 0.8192471861839294, |
|
"learning_rate": 7.512490687540009e-05, |
|
"loss": 0.6686, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 7.988916024798046, |
|
"grad_norm": 0.7316805720329285, |
|
"learning_rate": 7.501136379650388e-05, |
|
"loss": 0.6505, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 8.00300582378358, |
|
"grad_norm": 0.8020321726799011, |
|
"learning_rate": 7.489764843217082e-05, |
|
"loss": 0.6468, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 8.018034942701485, |
|
"grad_norm": 0.7429752349853516, |
|
"learning_rate": 7.478376156570489e-05, |
|
"loss": 0.5209, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 8.033064061619388, |
|
"grad_norm": 0.7338524460792542, |
|
"learning_rate": 7.466970398159145e-05, |
|
"loss": 0.5215, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 8.048093180537292, |
|
"grad_norm": 0.7771674990653992, |
|
"learning_rate": 7.45554764654918e-05, |
|
"loss": 0.5066, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 8.063122299455195, |
|
"grad_norm": 0.7496100068092346, |
|
"learning_rate": 7.444107980423778e-05, |
|
"loss": 0.5101, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 8.078151418373098, |
|
"grad_norm": 0.8719698786735535, |
|
"learning_rate": 7.432651478582636e-05, |
|
"loss": 0.513, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 8.093180537291001, |
|
"grad_norm": 0.706078052520752, |
|
"learning_rate": 7.42117821994142e-05, |
|
"loss": 0.5185, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 8.108209656208905, |
|
"grad_norm": 0.7622345685958862, |
|
"learning_rate": 7.409688283531222e-05, |
|
"loss": 0.5162, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.123238775126808, |
|
"grad_norm": 0.7656405568122864, |
|
"learning_rate": 7.398181748498015e-05, |
|
"loss": 0.5137, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 8.138267894044711, |
|
"grad_norm": 0.8089895248413086, |
|
"learning_rate": 7.386658694102103e-05, |
|
"loss": 0.5006, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 8.153297012962614, |
|
"grad_norm": 0.7622844576835632, |
|
"learning_rate": 7.375119199717591e-05, |
|
"loss": 0.5224, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 8.16832613188052, |
|
"grad_norm": 0.8785136342048645, |
|
"learning_rate": 7.363563344831818e-05, |
|
"loss": 0.5277, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 8.183355250798423, |
|
"grad_norm": 0.8507887721061707, |
|
"learning_rate": 7.351991209044821e-05, |
|
"loss": 0.5203, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 8.198384369716326, |
|
"grad_norm": 0.9602698683738708, |
|
"learning_rate": 7.340402872068789e-05, |
|
"loss": 0.5186, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 8.21341348863423, |
|
"grad_norm": 0.8880749344825745, |
|
"learning_rate": 7.328798413727503e-05, |
|
"loss": 0.5175, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 8.228442607552132, |
|
"grad_norm": 0.8679527640342712, |
|
"learning_rate": 7.317177913955795e-05, |
|
"loss": 0.513, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 8.243471726470036, |
|
"grad_norm": 0.7859882116317749, |
|
"learning_rate": 7.305541452798997e-05, |
|
"loss": 0.5252, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 8.258500845387939, |
|
"grad_norm": 0.8226519227027893, |
|
"learning_rate": 7.293889110412387e-05, |
|
"loss": 0.5211, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.273529964305842, |
|
"grad_norm": 0.8628718256950378, |
|
"learning_rate": 7.282220967060633e-05, |
|
"loss": 0.5294, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 8.288559083223745, |
|
"grad_norm": 0.9453558325767517, |
|
"learning_rate": 7.270537103117252e-05, |
|
"loss": 0.5238, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 8.303588202141649, |
|
"grad_norm": 0.9046574831008911, |
|
"learning_rate": 7.258837599064043e-05, |
|
"loss": 0.5186, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 8.318617321059552, |
|
"grad_norm": 0.9415176510810852, |
|
"learning_rate": 7.24712253549054e-05, |
|
"loss": 0.5282, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 8.333646439977457, |
|
"grad_norm": 0.8018948435783386, |
|
"learning_rate": 7.235391993093456e-05, |
|
"loss": 0.5264, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 8.34867555889536, |
|
"grad_norm": 0.818480908870697, |
|
"learning_rate": 7.22364605267613e-05, |
|
"loss": 0.5272, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 8.363704677813264, |
|
"grad_norm": 0.8961235284805298, |
|
"learning_rate": 7.211884795147958e-05, |
|
"loss": 0.5373, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 8.378733796731167, |
|
"grad_norm": 0.8245147466659546, |
|
"learning_rate": 7.200108301523854e-05, |
|
"loss": 0.5423, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 8.39376291564907, |
|
"grad_norm": 0.8225317001342773, |
|
"learning_rate": 7.188316652923677e-05, |
|
"loss": 0.5374, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 8.408792034566973, |
|
"grad_norm": 0.9353516697883606, |
|
"learning_rate": 7.176509930571682e-05, |
|
"loss": 0.5418, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.423821153484877, |
|
"grad_norm": 0.9062713384628296, |
|
"learning_rate": 7.16468821579595e-05, |
|
"loss": 0.5508, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 8.43885027240278, |
|
"grad_norm": 0.8618881106376648, |
|
"learning_rate": 7.152851590027843e-05, |
|
"loss": 0.5424, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 8.453879391320683, |
|
"grad_norm": 0.8350569009780884, |
|
"learning_rate": 7.141000134801425e-05, |
|
"loss": 0.5433, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 8.468908510238588, |
|
"grad_norm": 0.8575078845024109, |
|
"learning_rate": 7.129133931752914e-05, |
|
"loss": 0.5459, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 8.483937629156491, |
|
"grad_norm": 0.869219183921814, |
|
"learning_rate": 7.117253062620118e-05, |
|
"loss": 0.5397, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 8.498966748074395, |
|
"grad_norm": 0.900360643863678, |
|
"learning_rate": 7.105357609241863e-05, |
|
"loss": 0.5435, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 8.513995866992298, |
|
"grad_norm": 0.9262248277664185, |
|
"learning_rate": 7.093447653557441e-05, |
|
"loss": 0.5462, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 8.529024985910201, |
|
"grad_norm": 0.9586583971977234, |
|
"learning_rate": 7.081523277606035e-05, |
|
"loss": 0.5386, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 8.544054104828104, |
|
"grad_norm": 0.8671521544456482, |
|
"learning_rate": 7.069584563526166e-05, |
|
"loss": 0.539, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 8.559083223746008, |
|
"grad_norm": 0.8206884860992432, |
|
"learning_rate": 7.057631593555111e-05, |
|
"loss": 0.5389, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.574112342663911, |
|
"grad_norm": 0.8640275597572327, |
|
"learning_rate": 7.045664450028352e-05, |
|
"loss": 0.5443, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 8.589141461581814, |
|
"grad_norm": 0.8697555661201477, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 0.5488, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 8.604170580499718, |
|
"grad_norm": 0.9721740484237671, |
|
"learning_rate": 7.021687972137235e-05, |
|
"loss": 0.5474, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 8.61919969941762, |
|
"grad_norm": 0.895819902420044, |
|
"learning_rate": 7.009678802929724e-05, |
|
"loss": 0.5504, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 8.634228818335526, |
|
"grad_norm": 1.060189962387085, |
|
"learning_rate": 6.997655790479061e-05, |
|
"loss": 0.5469, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 8.649257937253429, |
|
"grad_norm": 0.955331563949585, |
|
"learning_rate": 6.985619017603207e-05, |
|
"loss": 0.5491, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 8.664287056171332, |
|
"grad_norm": 0.9543823599815369, |
|
"learning_rate": 6.973568567214894e-05, |
|
"loss": 0.5549, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 8.679316175089236, |
|
"grad_norm": 0.8880019187927246, |
|
"learning_rate": 6.961504522321076e-05, |
|
"loss": 0.5466, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 8.694345294007139, |
|
"grad_norm": 0.8980219960212708, |
|
"learning_rate": 6.949426966022354e-05, |
|
"loss": 0.5321, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 8.709374412925042, |
|
"grad_norm": 0.9821533560752869, |
|
"learning_rate": 6.937335981512389e-05, |
|
"loss": 0.5466, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 8.724403531842945, |
|
"grad_norm": 0.9177353978157043, |
|
"learning_rate": 6.925231652077348e-05, |
|
"loss": 0.5568, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 8.739432650760849, |
|
"grad_norm": 0.9436571002006531, |
|
"learning_rate": 6.913114061095319e-05, |
|
"loss": 0.5537, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 8.754461769678752, |
|
"grad_norm": 0.8605087995529175, |
|
"learning_rate": 6.900983292035739e-05, |
|
"loss": 0.5456, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 8.769490888596657, |
|
"grad_norm": 0.9178728461265564, |
|
"learning_rate": 6.888839428458818e-05, |
|
"loss": 0.5522, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 8.78452000751456, |
|
"grad_norm": 0.8443792462348938, |
|
"learning_rate": 6.876682554014967e-05, |
|
"loss": 0.5465, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 8.799549126432463, |
|
"grad_norm": 0.8694719076156616, |
|
"learning_rate": 6.86451275244422e-05, |
|
"loss": 0.5516, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 8.814578245350367, |
|
"grad_norm": 0.8430178165435791, |
|
"learning_rate": 6.852330107575652e-05, |
|
"loss": 0.549, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 8.82960736426827, |
|
"grad_norm": 0.8651490211486816, |
|
"learning_rate": 6.840134703326815e-05, |
|
"loss": 0.5525, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 8.844636483186173, |
|
"grad_norm": 0.7867377400398254, |
|
"learning_rate": 6.827926623703142e-05, |
|
"loss": 0.5594, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 8.859665602104076, |
|
"grad_norm": 0.9743750691413879, |
|
"learning_rate": 6.815705952797382e-05, |
|
"loss": 0.5617, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 8.87469472102198, |
|
"grad_norm": 0.8857339024543762, |
|
"learning_rate": 6.80347277478902e-05, |
|
"loss": 0.5559, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 8.889723839939883, |
|
"grad_norm": 0.9169685244560242, |
|
"learning_rate": 6.791227173943684e-05, |
|
"loss": 0.5473, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 8.904752958857786, |
|
"grad_norm": 1.0672627687454224, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 0.5532, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 8.91978207777569, |
|
"grad_norm": 0.9694510698318481, |
|
"learning_rate": 6.766699041231913e-05, |
|
"loss": 0.5541, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 8.934811196693595, |
|
"grad_norm": 0.940804123878479, |
|
"learning_rate": 6.754416678322281e-05, |
|
"loss": 0.5569, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 8.949840315611498, |
|
"grad_norm": 0.9347053170204163, |
|
"learning_rate": 6.74212223048812e-05, |
|
"loss": 0.5614, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 8.964869434529401, |
|
"grad_norm": 0.8529021739959717, |
|
"learning_rate": 6.729815782417105e-05, |
|
"loss": 0.5438, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 8.979898553447304, |
|
"grad_norm": 0.9158792495727539, |
|
"learning_rate": 6.717497418879579e-05, |
|
"loss": 0.5687, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 8.994927672365208, |
|
"grad_norm": 0.8642351627349854, |
|
"learning_rate": 6.705167224727955e-05, |
|
"loss": 0.5508, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 9.009017471350742, |
|
"grad_norm": 1.036657452583313, |
|
"learning_rate": 6.692825284896142e-05, |
|
"loss": 0.496, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.024046590268645, |
|
"grad_norm": 1.0688594579696655, |
|
"learning_rate": 6.680471684398957e-05, |
|
"loss": 0.4279, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 9.039075709186548, |
|
"grad_norm": 0.9282298684120178, |
|
"learning_rate": 6.668106508331539e-05, |
|
"loss": 0.4258, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 9.054104828104453, |
|
"grad_norm": 0.8562738299369812, |
|
"learning_rate": 6.655729841868758e-05, |
|
"loss": 0.4266, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 9.069133947022356, |
|
"grad_norm": 0.9267016649246216, |
|
"learning_rate": 6.643341770264642e-05, |
|
"loss": 0.4253, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 9.08416306594026, |
|
"grad_norm": 0.838796079158783, |
|
"learning_rate": 6.630942378851774e-05, |
|
"loss": 0.4209, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 9.099192184858163, |
|
"grad_norm": 1.0836501121520996, |
|
"learning_rate": 6.618531753040712e-05, |
|
"loss": 0.4319, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 9.114221303776066, |
|
"grad_norm": 0.912151038646698, |
|
"learning_rate": 6.606109978319404e-05, |
|
"loss": 0.4242, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 9.12925042269397, |
|
"grad_norm": 0.9484944939613342, |
|
"learning_rate": 6.593677140252588e-05, |
|
"loss": 0.4275, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 9.144279541611873, |
|
"grad_norm": 0.8877925276756287, |
|
"learning_rate": 6.581233324481216e-05, |
|
"loss": 0.4372, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 9.159308660529776, |
|
"grad_norm": 0.9061231017112732, |
|
"learning_rate": 6.568778616721853e-05, |
|
"loss": 0.4309, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 9.17433777944768, |
|
"grad_norm": 0.9550976753234863, |
|
"learning_rate": 6.556313102766094e-05, |
|
"loss": 0.4344, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 9.189366898365583, |
|
"grad_norm": 0.9908791780471802, |
|
"learning_rate": 6.543836868479968e-05, |
|
"loss": 0.4366, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 9.204396017283488, |
|
"grad_norm": 1.0337473154067993, |
|
"learning_rate": 6.531349999803353e-05, |
|
"loss": 0.4357, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 9.21942513620139, |
|
"grad_norm": 0.9019971489906311, |
|
"learning_rate": 6.518852582749373e-05, |
|
"loss": 0.439, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 9.234454255119294, |
|
"grad_norm": 0.9498554468154907, |
|
"learning_rate": 6.506344703403819e-05, |
|
"loss": 0.4348, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 9.249483374037197, |
|
"grad_norm": 0.9589983820915222, |
|
"learning_rate": 6.493826447924541e-05, |
|
"loss": 0.4512, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 9.2645124929551, |
|
"grad_norm": 0.9420648217201233, |
|
"learning_rate": 6.481297902540875e-05, |
|
"loss": 0.4415, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 9.279541611873004, |
|
"grad_norm": 0.8353439569473267, |
|
"learning_rate": 6.468759153553022e-05, |
|
"loss": 0.4482, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 9.294570730790907, |
|
"grad_norm": 0.9372383952140808, |
|
"learning_rate": 6.456210287331483e-05, |
|
"loss": 0.4401, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 9.30959984970881, |
|
"grad_norm": 1.0183303356170654, |
|
"learning_rate": 6.443651390316437e-05, |
|
"loss": 0.4387, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 9.324628968626714, |
|
"grad_norm": 0.9157505035400391, |
|
"learning_rate": 6.431082549017166e-05, |
|
"loss": 0.4364, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 9.339658087544617, |
|
"grad_norm": 0.9424082040786743, |
|
"learning_rate": 6.41850385001145e-05, |
|
"loss": 0.4456, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 9.354687206462522, |
|
"grad_norm": 0.987912654876709, |
|
"learning_rate": 6.405915379944966e-05, |
|
"loss": 0.4427, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 9.369716325380425, |
|
"grad_norm": 0.9018827676773071, |
|
"learning_rate": 6.393317225530706e-05, |
|
"loss": 0.4545, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 9.384745444298328, |
|
"grad_norm": 0.8961259722709656, |
|
"learning_rate": 6.380709473548361e-05, |
|
"loss": 0.4524, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 9.399774563216232, |
|
"grad_norm": 0.939476728439331, |
|
"learning_rate": 6.368092210843739e-05, |
|
"loss": 0.4465, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 9.414803682134135, |
|
"grad_norm": 0.9325003623962402, |
|
"learning_rate": 6.35546552432816e-05, |
|
"loss": 0.4562, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 9.429832801052038, |
|
"grad_norm": 1.0927010774612427, |
|
"learning_rate": 6.342829500977856e-05, |
|
"loss": 0.4499, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 9.444861919969942, |
|
"grad_norm": 0.9243865013122559, |
|
"learning_rate": 6.330184227833376e-05, |
|
"loss": 0.4469, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 9.459891038887845, |
|
"grad_norm": 0.9676965475082397, |
|
"learning_rate": 6.31752979199898e-05, |
|
"loss": 0.4475, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 9.474920157805748, |
|
"grad_norm": 1.0749905109405518, |
|
"learning_rate": 6.30486628064205e-05, |
|
"loss": 0.4644, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 9.489949276723651, |
|
"grad_norm": 1.0174274444580078, |
|
"learning_rate": 6.292193780992474e-05, |
|
"loss": 0.4657, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 9.504978395641556, |
|
"grad_norm": 0.9137683510780334, |
|
"learning_rate": 6.279512380342065e-05, |
|
"loss": 0.4574, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 9.52000751455946, |
|
"grad_norm": 0.8929033279418945, |
|
"learning_rate": 6.266822166043937e-05, |
|
"loss": 0.4571, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 9.535036633477363, |
|
"grad_norm": 1.0599805116653442, |
|
"learning_rate": 6.254123225511923e-05, |
|
"loss": 0.4606, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 9.550065752395266, |
|
"grad_norm": 1.183914065361023, |
|
"learning_rate": 6.241415646219963e-05, |
|
"loss": 0.459, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 9.56509487131317, |
|
"grad_norm": 1.0352977514266968, |
|
"learning_rate": 6.228699515701501e-05, |
|
"loss": 0.4593, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 9.580123990231073, |
|
"grad_norm": 0.8676705956459045, |
|
"learning_rate": 6.215974921548887e-05, |
|
"loss": 0.4546, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 9.595153109148976, |
|
"grad_norm": 1.03312087059021, |
|
"learning_rate": 6.203241951412767e-05, |
|
"loss": 0.4495, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 9.61018222806688, |
|
"grad_norm": 0.9865357279777527, |
|
"learning_rate": 6.19050069300149e-05, |
|
"loss": 0.4533, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.625211346984782, |
|
"grad_norm": 1.0788352489471436, |
|
"learning_rate": 6.177751234080491e-05, |
|
"loss": 0.4515, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 9.640240465902686, |
|
"grad_norm": 1.049320936203003, |
|
"learning_rate": 6.164993662471692e-05, |
|
"loss": 0.4568, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 9.65526958482059, |
|
"grad_norm": 0.9056411981582642, |
|
"learning_rate": 6.152228066052904e-05, |
|
"loss": 0.4648, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 9.670298703738494, |
|
"grad_norm": 0.9347831010818481, |
|
"learning_rate": 6.139454532757208e-05, |
|
"loss": 0.4622, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 9.685327822656397, |
|
"grad_norm": 0.9340201020240784, |
|
"learning_rate": 6.126673150572362e-05, |
|
"loss": 0.4537, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 9.7003569415743, |
|
"grad_norm": 0.9909615516662598, |
|
"learning_rate": 6.113884007540184e-05, |
|
"loss": 0.4704, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 9.715386060492204, |
|
"grad_norm": 1.0939775705337524, |
|
"learning_rate": 6.1010871917559576e-05, |
|
"loss": 0.4596, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 9.730415179410107, |
|
"grad_norm": 0.9341562986373901, |
|
"learning_rate": 6.088282791367812e-05, |
|
"loss": 0.46, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 9.74544429832801, |
|
"grad_norm": 0.9412760734558105, |
|
"learning_rate": 6.075470894576124e-05, |
|
"loss": 0.4701, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 9.760473417245914, |
|
"grad_norm": 1.0007338523864746, |
|
"learning_rate": 6.062651589632911e-05, |
|
"loss": 0.4652, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.775502536163817, |
|
"grad_norm": 1.0357065200805664, |
|
"learning_rate": 6.0498249648412134e-05, |
|
"loss": 0.4684, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 9.79053165508172, |
|
"grad_norm": 0.8514649868011475, |
|
"learning_rate": 6.036991108554497e-05, |
|
"loss": 0.454, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 9.805560773999623, |
|
"grad_norm": 0.9953536987304688, |
|
"learning_rate": 6.02415010917604e-05, |
|
"loss": 0.4579, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 9.820589892917528, |
|
"grad_norm": 0.9308024644851685, |
|
"learning_rate": 6.011302055158324e-05, |
|
"loss": 0.4631, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 9.835619011835432, |
|
"grad_norm": 0.9298855662345886, |
|
"learning_rate": 5.9984470350024256e-05, |
|
"loss": 0.4544, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 9.850648130753335, |
|
"grad_norm": 0.9751214385032654, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 0.4571, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 9.865677249671238, |
|
"grad_norm": 0.9474308490753174, |
|
"learning_rate": 5.9727164505196905e-05, |
|
"loss": 0.4658, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 9.880706368589141, |
|
"grad_norm": 1.0583529472351074, |
|
"learning_rate": 5.95984106343249e-05, |
|
"loss": 0.4561, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 9.895735487507045, |
|
"grad_norm": 1.0418837070465088, |
|
"learning_rate": 5.946959064685156e-05, |
|
"loss": 0.4637, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 9.910764606424948, |
|
"grad_norm": 1.0113483667373657, |
|
"learning_rate": 5.934070543012582e-05, |
|
"loss": 0.4705, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.925793725342851, |
|
"grad_norm": 1.046410083770752, |
|
"learning_rate": 5.921175587194601e-05, |
|
"loss": 0.4884, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 9.940822844260754, |
|
"grad_norm": 0.9872678518295288, |
|
"learning_rate": 5.9082742860553576e-05, |
|
"loss": 0.4744, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 9.95585196317866, |
|
"grad_norm": 1.0428500175476074, |
|
"learning_rate": 5.895366728462709e-05, |
|
"loss": 0.4704, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 9.970881082096563, |
|
"grad_norm": 0.922476053237915, |
|
"learning_rate": 5.882453003327612e-05, |
|
"loss": 0.465, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 9.985910201014466, |
|
"grad_norm": 1.03745698928833, |
|
"learning_rate": 5.8695331996034986e-05, |
|
"loss": 0.4674, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.6415784358978271, |
|
"learning_rate": 5.8566074062856815e-05, |
|
"loss": 0.4717, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 10.015029118917903, |
|
"grad_norm": 0.9536633491516113, |
|
"learning_rate": 5.8436757124107245e-05, |
|
"loss": 0.361, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 10.030058237835807, |
|
"grad_norm": 0.8403608202934265, |
|
"learning_rate": 5.83073820705584e-05, |
|
"loss": 0.3593, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 10.04508735675371, |
|
"grad_norm": 1.0014981031417847, |
|
"learning_rate": 5.8177949793382705e-05, |
|
"loss": 0.3669, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 10.060116475671613, |
|
"grad_norm": 0.9928374290466309, |
|
"learning_rate": 5.804846118414671e-05, |
|
"loss": 0.3584, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 10.075145594589518, |
|
"grad_norm": 0.9604836106300354, |
|
"learning_rate": 5.7918917134805096e-05, |
|
"loss": 0.3467, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 10.090174713507421, |
|
"grad_norm": 1.0535321235656738, |
|
"learning_rate": 5.7789318537694335e-05, |
|
"loss": 0.3623, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 10.105203832425325, |
|
"grad_norm": 1.0338060855865479, |
|
"learning_rate": 5.76596662855267e-05, |
|
"loss": 0.3504, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 10.120232951343228, |
|
"grad_norm": 0.9590771794319153, |
|
"learning_rate": 5.752996127138404e-05, |
|
"loss": 0.3571, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 10.135262070261131, |
|
"grad_norm": 0.939929187297821, |
|
"learning_rate": 5.740020438871162e-05, |
|
"loss": 0.3709, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 10.150291189179034, |
|
"grad_norm": 1.0055979490280151, |
|
"learning_rate": 5.727039653131202e-05, |
|
"loss": 0.3646, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 10.165320308096938, |
|
"grad_norm": 1.0767991542816162, |
|
"learning_rate": 5.714053859333893e-05, |
|
"loss": 0.3626, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 10.180349427014841, |
|
"grad_norm": 0.9774537682533264, |
|
"learning_rate": 5.701063146929103e-05, |
|
"loss": 0.3691, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 10.195378545932744, |
|
"grad_norm": 1.1948145627975464, |
|
"learning_rate": 5.688067605400579e-05, |
|
"loss": 0.3707, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 10.210407664850647, |
|
"grad_norm": 1.1181336641311646, |
|
"learning_rate": 5.675067324265332e-05, |
|
"loss": 0.3637, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.22543678376855, |
|
"grad_norm": 0.9550219774246216, |
|
"learning_rate": 5.662062393073022e-05, |
|
"loss": 0.3625, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 10.240465902686456, |
|
"grad_norm": 0.9461958408355713, |
|
"learning_rate": 5.6490529014053405e-05, |
|
"loss": 0.3719, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 10.255495021604359, |
|
"grad_norm": 0.9581360816955566, |
|
"learning_rate": 5.636038938875391e-05, |
|
"loss": 0.3711, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 10.270524140522262, |
|
"grad_norm": 0.9395859837532043, |
|
"learning_rate": 5.623020595127073e-05, |
|
"loss": 0.3624, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 10.285553259440166, |
|
"grad_norm": 1.146485447883606, |
|
"learning_rate": 5.609997959834471e-05, |
|
"loss": 0.3684, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 10.300582378358069, |
|
"grad_norm": 0.9923917055130005, |
|
"learning_rate": 5.596971122701221e-05, |
|
"loss": 0.3695, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 10.315611497275972, |
|
"grad_norm": 0.9672958850860596, |
|
"learning_rate": 5.583940173459913e-05, |
|
"loss": 0.3735, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 10.330640616193875, |
|
"grad_norm": 0.9627594947814941, |
|
"learning_rate": 5.5709052018714536e-05, |
|
"loss": 0.3585, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 10.345669735111779, |
|
"grad_norm": 1.0451908111572266, |
|
"learning_rate": 5.5578662977244625e-05, |
|
"loss": 0.3726, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 10.360698854029682, |
|
"grad_norm": 1.0388795137405396, |
|
"learning_rate": 5.5448235508346435e-05, |
|
"loss": 0.3778, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.375727972947587, |
|
"grad_norm": 0.9968121647834778, |
|
"learning_rate": 5.5317770510441745e-05, |
|
"loss": 0.3837, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 10.39075709186549, |
|
"grad_norm": 1.104638934135437, |
|
"learning_rate": 5.518726888221082e-05, |
|
"loss": 0.3719, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 10.405786210783393, |
|
"grad_norm": 1.006320595741272, |
|
"learning_rate": 5.5056731522586236e-05, |
|
"loss": 0.3664, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 10.420815329701297, |
|
"grad_norm": 1.1039286851882935, |
|
"learning_rate": 5.492615933074673e-05, |
|
"loss": 0.3768, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 10.4358444486192, |
|
"grad_norm": 0.9026983380317688, |
|
"learning_rate": 5.479555320611094e-05, |
|
"loss": 0.3661, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 10.450873567537103, |
|
"grad_norm": 1.0680197477340698, |
|
"learning_rate": 5.466491404833127e-05, |
|
"loss": 0.375, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 10.465902686455006, |
|
"grad_norm": 1.079924464225769, |
|
"learning_rate": 5.4534242757287643e-05, |
|
"loss": 0.3865, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 10.48093180537291, |
|
"grad_norm": 1.037091851234436, |
|
"learning_rate": 5.440354023308134e-05, |
|
"loss": 0.3861, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 10.495960924290813, |
|
"grad_norm": 1.0389127731323242, |
|
"learning_rate": 5.4272807376028777e-05, |
|
"loss": 0.3701, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 10.510990043208716, |
|
"grad_norm": 1.079481840133667, |
|
"learning_rate": 5.41420450866553e-05, |
|
"loss": 0.3775, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.52601916212662, |
|
"grad_norm": 1.3485366106033325, |
|
"learning_rate": 5.401125426568904e-05, |
|
"loss": 0.3722, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 10.541048281044525, |
|
"grad_norm": 1.0112107992172241, |
|
"learning_rate": 5.388043581405461e-05, |
|
"loss": 0.3712, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 10.556077399962428, |
|
"grad_norm": 0.9727371335029602, |
|
"learning_rate": 5.374959063286695e-05, |
|
"loss": 0.3732, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 10.571106518880331, |
|
"grad_norm": 0.9836901426315308, |
|
"learning_rate": 5.361871962342518e-05, |
|
"loss": 0.3787, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 10.586135637798234, |
|
"grad_norm": 1.0882790088653564, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 0.3816, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 10.601164756716138, |
|
"grad_norm": 0.9604332447052002, |
|
"learning_rate": 5.335690372585892e-05, |
|
"loss": 0.3765, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 10.61619387563404, |
|
"grad_norm": 0.9835896492004395, |
|
"learning_rate": 5.322596064119731e-05, |
|
"loss": 0.3808, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 10.631222994551944, |
|
"grad_norm": 0.9179807901382446, |
|
"learning_rate": 5.309499533519493e-05, |
|
"loss": 0.378, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 10.646252113469847, |
|
"grad_norm": 1.0876275300979614, |
|
"learning_rate": 5.2964008709978305e-05, |
|
"loss": 0.3752, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 10.66128123238775, |
|
"grad_norm": 0.9817517995834351, |
|
"learning_rate": 5.2833001667820816e-05, |
|
"loss": 0.3856, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 10.676310351305656, |
|
"grad_norm": 1.0658329725265503, |
|
"learning_rate": 5.270197511113649e-05, |
|
"loss": 0.3747, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 10.691339470223559, |
|
"grad_norm": 1.0060932636260986, |
|
"learning_rate": 5.257092994247377e-05, |
|
"loss": 0.3867, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 10.706368589141462, |
|
"grad_norm": 1.1070188283920288, |
|
"learning_rate": 5.243986706450933e-05, |
|
"loss": 0.3765, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 10.721397708059365, |
|
"grad_norm": 0.9768523573875427, |
|
"learning_rate": 5.2308787380041777e-05, |
|
"loss": 0.3852, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 10.736426826977269, |
|
"grad_norm": 0.9963809847831726, |
|
"learning_rate": 5.217769179198555e-05, |
|
"loss": 0.3924, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 10.751455945895172, |
|
"grad_norm": 0.9897161722183228, |
|
"learning_rate": 5.2046581203364586e-05, |
|
"loss": 0.3871, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 10.766485064813075, |
|
"grad_norm": 1.0196555852890015, |
|
"learning_rate": 5.191545651730616e-05, |
|
"loss": 0.3766, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 10.781514183730978, |
|
"grad_norm": 0.8715333342552185, |
|
"learning_rate": 5.1784318637034676e-05, |
|
"loss": 0.3878, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 10.796543302648882, |
|
"grad_norm": 1.0659235715866089, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 0.387, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 10.811572421566785, |
|
"grad_norm": 1.0283163785934448, |
|
"learning_rate": 5.15220069071983e-05, |
|
"loss": 0.3899, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 10.826601540484688, |
|
"grad_norm": 0.972322404384613, |
|
"learning_rate": 5.139083486451172e-05, |
|
"loss": 0.3916, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 10.841630659402593, |
|
"grad_norm": 1.1113601922988892, |
|
"learning_rate": 5.1259653241356276e-05, |
|
"loss": 0.3832, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 10.856659778320497, |
|
"grad_norm": 1.1082892417907715, |
|
"learning_rate": 5.1128462941348554e-05, |
|
"loss": 0.3863, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 10.8716888972384, |
|
"grad_norm": 1.0528475046157837, |
|
"learning_rate": 5.0997264868164903e-05, |
|
"loss": 0.393, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 10.886718016156303, |
|
"grad_norm": 0.9899016618728638, |
|
"learning_rate": 5.0866059925535234e-05, |
|
"loss": 0.39, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 10.901747135074206, |
|
"grad_norm": 1.1150156259536743, |
|
"learning_rate": 5.073484901723676e-05, |
|
"loss": 0.3806, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 10.91677625399211, |
|
"grad_norm": 1.0797758102416992, |
|
"learning_rate": 5.0603633047087817e-05, |
|
"loss": 0.3953, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 10.931805372910013, |
|
"grad_norm": 1.122441291809082, |
|
"learning_rate": 5.047241291894156e-05, |
|
"loss": 0.386, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 10.946834491827916, |
|
"grad_norm": 0.8962685465812683, |
|
"learning_rate": 5.034118953667982e-05, |
|
"loss": 0.3914, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 10.96186361074582, |
|
"grad_norm": 1.1607177257537842, |
|
"learning_rate": 5.020996380420685e-05, |
|
"loss": 0.3995, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 10.976892729663723, |
|
"grad_norm": 1.0731902122497559, |
|
"learning_rate": 5.0078736625443054e-05, |
|
"loss": 0.3836, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 10.991921848581628, |
|
"grad_norm": 1.0019197463989258, |
|
"learning_rate": 4.994750890431884e-05, |
|
"loss": 0.3845, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 11.006011647567162, |
|
"grad_norm": 0.9175123572349548, |
|
"learning_rate": 4.9816281544768326e-05, |
|
"loss": 0.3611, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 11.021040766485065, |
|
"grad_norm": 0.8413906097412109, |
|
"learning_rate": 4.968505545072313e-05, |
|
"loss": 0.3021, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 11.036069885402968, |
|
"grad_norm": 1.0692964792251587, |
|
"learning_rate": 4.955383152610621e-05, |
|
"loss": 0.2892, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 11.051099004320871, |
|
"grad_norm": 1.0013508796691895, |
|
"learning_rate": 4.9422610674825495e-05, |
|
"loss": 0.2979, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 11.066128123238775, |
|
"grad_norm": 1.0104172229766846, |
|
"learning_rate": 4.929139380076783e-05, |
|
"loss": 0.2995, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 11.081157242156678, |
|
"grad_norm": 1.0872989892959595, |
|
"learning_rate": 4.9160181807792586e-05, |
|
"loss": 0.2909, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 11.096186361074581, |
|
"grad_norm": 1.1095547676086426, |
|
"learning_rate": 4.90289755997256e-05, |
|
"loss": 0.29, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 11.111215479992486, |
|
"grad_norm": 1.0950359106063843, |
|
"learning_rate": 4.889777608035273e-05, |
|
"loss": 0.3107, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 11.12624459891039, |
|
"grad_norm": 1.060843586921692, |
|
"learning_rate": 4.876658415341393e-05, |
|
"loss": 0.3128, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 11.141273717828293, |
|
"grad_norm": 1.0450581312179565, |
|
"learning_rate": 4.863540072259668e-05, |
|
"loss": 0.3099, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 11.156302836746196, |
|
"grad_norm": 0.9836236238479614, |
|
"learning_rate": 4.850422669153009e-05, |
|
"loss": 0.3038, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 11.1713319556641, |
|
"grad_norm": 0.9338634610176086, |
|
"learning_rate": 4.837306296377841e-05, |
|
"loss": 0.2983, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 11.186361074582003, |
|
"grad_norm": 0.9969077706336975, |
|
"learning_rate": 4.824191044283498e-05, |
|
"loss": 0.3041, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 11.201390193499906, |
|
"grad_norm": 1.1370275020599365, |
|
"learning_rate": 4.811077003211592e-05, |
|
"loss": 0.3124, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 11.216419312417809, |
|
"grad_norm": 1.122521162033081, |
|
"learning_rate": 4.797964263495394e-05, |
|
"loss": 0.3077, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 11.231448431335712, |
|
"grad_norm": 1.1988801956176758, |
|
"learning_rate": 4.78485291545921e-05, |
|
"loss": 0.3154, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 11.246477550253616, |
|
"grad_norm": 1.1286782026290894, |
|
"learning_rate": 4.771743049417761e-05, |
|
"loss": 0.2994, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 11.26150666917152, |
|
"grad_norm": 1.0577936172485352, |
|
"learning_rate": 4.7586347556755573e-05, |
|
"loss": 0.3036, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.276535788089424, |
|
"grad_norm": 1.0209895372390747, |
|
"learning_rate": 4.745528124526282e-05, |
|
"loss": 0.3043, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 11.291564907007327, |
|
"grad_norm": 0.9786052107810974, |
|
"learning_rate": 4.7324232462521634e-05, |
|
"loss": 0.3089, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 11.30659402592523, |
|
"grad_norm": 1.1310527324676514, |
|
"learning_rate": 4.719320211123358e-05, |
|
"loss": 0.3016, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 11.321623144843134, |
|
"grad_norm": 0.9561529755592346, |
|
"learning_rate": 4.706219109397319e-05, |
|
"loss": 0.3154, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 11.336652263761037, |
|
"grad_norm": 0.9974495768547058, |
|
"learning_rate": 4.6931200313181944e-05, |
|
"loss": 0.3208, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 11.35168138267894, |
|
"grad_norm": 0.9916987419128418, |
|
"learning_rate": 4.6800230671161784e-05, |
|
"loss": 0.3069, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 11.366710501596843, |
|
"grad_norm": 1.231939435005188, |
|
"learning_rate": 4.666928307006918e-05, |
|
"loss": 0.3063, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 11.381739620514747, |
|
"grad_norm": 1.0125497579574585, |
|
"learning_rate": 4.6538358411908646e-05, |
|
"loss": 0.318, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 11.39676873943265, |
|
"grad_norm": 1.0557286739349365, |
|
"learning_rate": 4.640745759852677e-05, |
|
"loss": 0.3112, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 11.411797858350555, |
|
"grad_norm": 1.0968514680862427, |
|
"learning_rate": 4.6276581531605824e-05, |
|
"loss": 0.3163, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 11.426826977268458, |
|
"grad_norm": 1.0451496839523315, |
|
"learning_rate": 4.6145731112657644e-05, |
|
"loss": 0.3096, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 11.441856096186362, |
|
"grad_norm": 1.1789813041687012, |
|
"learning_rate": 4.601490724301738e-05, |
|
"loss": 0.3024, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 11.456885215104265, |
|
"grad_norm": 1.1728602647781372, |
|
"learning_rate": 4.5884110823837334e-05, |
|
"loss": 0.3052, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 11.471914334022168, |
|
"grad_norm": 1.032285451889038, |
|
"learning_rate": 4.5753342756080666e-05, |
|
"loss": 0.3108, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 11.486943452940071, |
|
"grad_norm": 1.1014740467071533, |
|
"learning_rate": 4.5622603940515326e-05, |
|
"loss": 0.3049, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 11.501972571857975, |
|
"grad_norm": 1.2548887729644775, |
|
"learning_rate": 4.549189527770767e-05, |
|
"loss": 0.3204, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 11.517001690775878, |
|
"grad_norm": 1.0855730772018433, |
|
"learning_rate": 4.5361217668016446e-05, |
|
"loss": 0.3136, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 11.532030809693781, |
|
"grad_norm": 0.9988487362861633, |
|
"learning_rate": 4.52305720115864e-05, |
|
"loss": 0.3173, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 11.547059928611684, |
|
"grad_norm": 1.1315146684646606, |
|
"learning_rate": 4.509995920834229e-05, |
|
"loss": 0.3138, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 11.56208904752959, |
|
"grad_norm": 0.9927186965942383, |
|
"learning_rate": 4.496938015798246e-05, |
|
"loss": 0.3079, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 11.577118166447493, |
|
"grad_norm": 1.1122972965240479, |
|
"learning_rate": 4.483883575997284e-05, |
|
"loss": 0.3179, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 11.592147285365396, |
|
"grad_norm": 1.007947564125061, |
|
"learning_rate": 4.47083269135406e-05, |
|
"loss": 0.3276, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 11.6071764042833, |
|
"grad_norm": 1.00367271900177, |
|
"learning_rate": 4.4577854517668075e-05, |
|
"loss": 0.3202, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 11.622205523201202, |
|
"grad_norm": 1.1806467771530151, |
|
"learning_rate": 4.4447419471086484e-05, |
|
"loss": 0.3203, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 11.637234642119106, |
|
"grad_norm": 1.2128424644470215, |
|
"learning_rate": 4.431702267226979e-05, |
|
"loss": 0.3188, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 11.652263761037009, |
|
"grad_norm": 1.2076245546340942, |
|
"learning_rate": 4.418666501942848e-05, |
|
"loss": 0.3093, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 11.667292879954912, |
|
"grad_norm": 1.1673307418823242, |
|
"learning_rate": 4.4056347410503414e-05, |
|
"loss": 0.3204, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 11.682321998872816, |
|
"grad_norm": 0.9249235987663269, |
|
"learning_rate": 4.392607074315957e-05, |
|
"loss": 0.3167, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 11.697351117790719, |
|
"grad_norm": 1.0417946577072144, |
|
"learning_rate": 4.379583591477999e-05, |
|
"loss": 0.3157, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 11.712380236708622, |
|
"grad_norm": 1.1642825603485107, |
|
"learning_rate": 4.366564382245943e-05, |
|
"loss": 0.3145, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 11.727409355626527, |
|
"grad_norm": 1.1535450220108032, |
|
"learning_rate": 4.353549536299835e-05, |
|
"loss": 0.3144, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 11.74243847454443, |
|
"grad_norm": 0.992770254611969, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 0.3084, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 11.757467593462334, |
|
"grad_norm": 1.064002275466919, |
|
"learning_rate": 4.327533292834723e-05, |
|
"loss": 0.3186, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 11.772496712380237, |
|
"grad_norm": 1.1059247255325317, |
|
"learning_rate": 4.314532074523057e-05, |
|
"loss": 0.3233, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 11.78752583129814, |
|
"grad_norm": 1.1188381910324097, |
|
"learning_rate": 4.3015355779107734e-05, |
|
"loss": 0.3361, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 11.802554950216043, |
|
"grad_norm": 1.0294090509414673, |
|
"learning_rate": 4.288543892521463e-05, |
|
"loss": 0.3144, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 11.817584069133947, |
|
"grad_norm": 1.265080451965332, |
|
"learning_rate": 4.275557107845576e-05, |
|
"loss": 0.3171, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 11.83261318805185, |
|
"grad_norm": 1.3412435054779053, |
|
"learning_rate": 4.262575313339803e-05, |
|
"loss": 0.3249, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 11.847642306969753, |
|
"grad_norm": 1.074264407157898, |
|
"learning_rate": 4.249598598426465e-05, |
|
"loss": 0.3241, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 11.862671425887658, |
|
"grad_norm": 1.2046911716461182, |
|
"learning_rate": 4.236627052492889e-05, |
|
"loss": 0.3202, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 11.877700544805561, |
|
"grad_norm": 1.1616815328598022, |
|
"learning_rate": 4.2236607648907984e-05, |
|
"loss": 0.3185, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 11.892729663723465, |
|
"grad_norm": 1.1158292293548584, |
|
"learning_rate": 4.210699824935695e-05, |
|
"loss": 0.3209, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 11.907758782641368, |
|
"grad_norm": 1.0398184061050415, |
|
"learning_rate": 4.197744321906247e-05, |
|
"loss": 0.3124, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 11.922787901559271, |
|
"grad_norm": 1.1969057321548462, |
|
"learning_rate": 4.1847943450436686e-05, |
|
"loss": 0.3432, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 11.937817020477175, |
|
"grad_norm": 1.1535173654556274, |
|
"learning_rate": 4.17184998355111e-05, |
|
"loss": 0.3143, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 11.952846139395078, |
|
"grad_norm": 1.0445293188095093, |
|
"learning_rate": 4.158911326593037e-05, |
|
"loss": 0.3222, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 11.967875258312981, |
|
"grad_norm": 1.1093374490737915, |
|
"learning_rate": 4.14597846329463e-05, |
|
"loss": 0.3311, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 11.982904377230884, |
|
"grad_norm": 1.1024218797683716, |
|
"learning_rate": 4.133051482741149e-05, |
|
"loss": 0.3153, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 11.997933496148788, |
|
"grad_norm": 1.0923748016357422, |
|
"learning_rate": 4.120130473977343e-05, |
|
"loss": 0.3194, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 12.012023295134323, |
|
"grad_norm": 1.1858222484588623, |
|
"learning_rate": 4.107215526006817e-05, |
|
"loss": 0.2696, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.027052414052227, |
|
"grad_norm": 0.9616860151290894, |
|
"learning_rate": 4.094306727791436e-05, |
|
"loss": 0.2594, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 12.04208153297013, |
|
"grad_norm": 0.9500885009765625, |
|
"learning_rate": 4.081404168250694e-05, |
|
"loss": 0.2461, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 12.057110651888033, |
|
"grad_norm": 1.0713434219360352, |
|
"learning_rate": 4.0685079362611204e-05, |
|
"loss": 0.2645, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 12.072139770805936, |
|
"grad_norm": 1.0027638673782349, |
|
"learning_rate": 4.055618120655652e-05, |
|
"loss": 0.2624, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 12.08716888972384, |
|
"grad_norm": 1.0205668210983276, |
|
"learning_rate": 4.0427348102230314e-05, |
|
"loss": 0.2464, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 12.102198008641743, |
|
"grad_norm": 0.970747172832489, |
|
"learning_rate": 4.029858093707189e-05, |
|
"loss": 0.2406, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 12.117227127559646, |
|
"grad_norm": 1.1178600788116455, |
|
"learning_rate": 4.01698805980664e-05, |
|
"loss": 0.2533, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 12.13225624647755, |
|
"grad_norm": 1.0586788654327393, |
|
"learning_rate": 4.004124797173857e-05, |
|
"loss": 0.2549, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 12.147285365395454, |
|
"grad_norm": 1.0152502059936523, |
|
"learning_rate": 3.991268394414685e-05, |
|
"loss": 0.2499, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 12.162314484313358, |
|
"grad_norm": 1.0560377836227417, |
|
"learning_rate": 3.9784189400877005e-05, |
|
"loss": 0.2591, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 12.177343603231261, |
|
"grad_norm": 1.1126878261566162, |
|
"learning_rate": 3.965576522703631e-05, |
|
"loss": 0.2593, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 12.192372722149164, |
|
"grad_norm": 0.9110709428787231, |
|
"learning_rate": 3.9527412307247205e-05, |
|
"loss": 0.2623, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 12.207401841067067, |
|
"grad_norm": 1.153400182723999, |
|
"learning_rate": 3.9399131525641405e-05, |
|
"loss": 0.2598, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 12.22243095998497, |
|
"grad_norm": 0.8933331966400146, |
|
"learning_rate": 3.927092376585363e-05, |
|
"loss": 0.2529, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 12.237460078902874, |
|
"grad_norm": 1.031607747077942, |
|
"learning_rate": 3.914278991101568e-05, |
|
"loss": 0.2554, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 12.252489197820777, |
|
"grad_norm": 1.1537200212478638, |
|
"learning_rate": 3.901473084375023e-05, |
|
"loss": 0.2474, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 12.26751831673868, |
|
"grad_norm": 1.024788498878479, |
|
"learning_rate": 3.88867474461648e-05, |
|
"loss": 0.2475, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 12.282547435656584, |
|
"grad_norm": 1.087825059890747, |
|
"learning_rate": 3.875884059984571e-05, |
|
"loss": 0.2568, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 12.297576554574489, |
|
"grad_norm": 1.000375509262085, |
|
"learning_rate": 3.863101118585194e-05, |
|
"loss": 0.259, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 12.312605673492392, |
|
"grad_norm": 1.0344016551971436, |
|
"learning_rate": 3.850326008470908e-05, |
|
"loss": 0.2553, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 12.327634792410295, |
|
"grad_norm": 0.9918733835220337, |
|
"learning_rate": 3.8375588176403345e-05, |
|
"loss": 0.2597, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 12.342663911328199, |
|
"grad_norm": 1.0089991092681885, |
|
"learning_rate": 3.8247996340375344e-05, |
|
"loss": 0.2477, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 12.357693030246102, |
|
"grad_norm": 1.012367606163025, |
|
"learning_rate": 3.812048545551426e-05, |
|
"loss": 0.2585, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 12.372722149164005, |
|
"grad_norm": 1.1676548719406128, |
|
"learning_rate": 3.799305640015152e-05, |
|
"loss": 0.2534, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 12.387751268081908, |
|
"grad_norm": 1.1742953062057495, |
|
"learning_rate": 3.786571005205498e-05, |
|
"loss": 0.2577, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 12.402780386999812, |
|
"grad_norm": 1.2898715734481812, |
|
"learning_rate": 3.773844728842275e-05, |
|
"loss": 0.2534, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 12.417809505917715, |
|
"grad_norm": 1.093583583831787, |
|
"learning_rate": 3.7611268985877215e-05, |
|
"loss": 0.259, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 12.432838624835618, |
|
"grad_norm": 0.9623090624809265, |
|
"learning_rate": 3.7484176020458906e-05, |
|
"loss": 0.2647, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 12.447867743753523, |
|
"grad_norm": 1.0669386386871338, |
|
"learning_rate": 3.735716926762059e-05, |
|
"loss": 0.2628, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 12.462896862671426, |
|
"grad_norm": 1.136635184288025, |
|
"learning_rate": 3.723024960222116e-05, |
|
"loss": 0.264, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 12.47792598158933, |
|
"grad_norm": 1.2198032140731812, |
|
"learning_rate": 3.710341789851962e-05, |
|
"loss": 0.2575, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 12.492955100507233, |
|
"grad_norm": 1.1004136800765991, |
|
"learning_rate": 3.697667503016904e-05, |
|
"loss": 0.2573, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 12.507984219425136, |
|
"grad_norm": 0.9815653562545776, |
|
"learning_rate": 3.685002187021064e-05, |
|
"loss": 0.2693, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 12.52301333834304, |
|
"grad_norm": 1.23141348361969, |
|
"learning_rate": 3.6723459291067615e-05, |
|
"loss": 0.2632, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 12.538042457260943, |
|
"grad_norm": 1.0357614755630493, |
|
"learning_rate": 3.65969881645393e-05, |
|
"loss": 0.2582, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 12.553071576178846, |
|
"grad_norm": 1.283329963684082, |
|
"learning_rate": 3.647060936179497e-05, |
|
"loss": 0.2654, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 12.56810069509675, |
|
"grad_norm": 1.062829613685608, |
|
"learning_rate": 3.63443237533681e-05, |
|
"loss": 0.2652, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 12.583129814014653, |
|
"grad_norm": 1.0494091510772705, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.2664, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 12.598158932932558, |
|
"grad_norm": 1.1577351093292236, |
|
"learning_rate": 3.6092035598384354e-05, |
|
"loss": 0.2765, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 12.61318805185046, |
|
"grad_norm": 1.1229662895202637, |
|
"learning_rate": 3.5966034789660574e-05, |
|
"loss": 0.2658, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 12.628217170768364, |
|
"grad_norm": 1.1747732162475586, |
|
"learning_rate": 3.584013065090837e-05, |
|
"loss": 0.2631, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 12.643246289686267, |
|
"grad_norm": 1.2156236171722412, |
|
"learning_rate": 3.571432404939149e-05, |
|
"loss": 0.2618, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 12.65827540860417, |
|
"grad_norm": 1.2369886636734009, |
|
"learning_rate": 3.5588615851701855e-05, |
|
"loss": 0.2637, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 12.673304527522074, |
|
"grad_norm": 0.9820154905319214, |
|
"learning_rate": 3.546300692375352e-05, |
|
"loss": 0.2675, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 12.688333646439977, |
|
"grad_norm": 1.0225483179092407, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 0.2634, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 12.70336276535788, |
|
"grad_norm": 0.9450991153717041, |
|
"learning_rate": 3.5212090337312095e-05, |
|
"loss": 0.2713, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 12.718391884275784, |
|
"grad_norm": 1.1000279188156128, |
|
"learning_rate": 3.508678440720431e-05, |
|
"loss": 0.2728, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 12.733421003193687, |
|
"grad_norm": 1.1958969831466675, |
|
"learning_rate": 3.496158120359653e-05, |
|
"loss": 0.2546, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 12.748450122111592, |
|
"grad_norm": 1.0161027908325195, |
|
"learning_rate": 3.483648158892431e-05, |
|
"loss": 0.265, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 12.763479241029495, |
|
"grad_norm": 1.069886326789856, |
|
"learning_rate": 3.471148642490957e-05, |
|
"loss": 0.2605, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.778508359947399, |
|
"grad_norm": 1.082297444343567, |
|
"learning_rate": 3.4586596572554856e-05, |
|
"loss": 0.2739, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 12.793537478865302, |
|
"grad_norm": 1.0885424613952637, |
|
"learning_rate": 3.4461812892137196e-05, |
|
"loss": 0.2708, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 12.808566597783205, |
|
"grad_norm": 1.0391422510147095, |
|
"learning_rate": 3.433713624320234e-05, |
|
"loss": 0.2655, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 12.823595716701108, |
|
"grad_norm": 1.225851058959961, |
|
"learning_rate": 3.421256748455873e-05, |
|
"loss": 0.2542, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 12.838624835619012, |
|
"grad_norm": 0.993791401386261, |
|
"learning_rate": 3.408810747427169e-05, |
|
"loss": 0.2697, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 12.853653954536915, |
|
"grad_norm": 1.0382951498031616, |
|
"learning_rate": 3.396375706965738e-05, |
|
"loss": 0.2706, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 12.868683073454818, |
|
"grad_norm": 1.0424343347549438, |
|
"learning_rate": 3.383951712727701e-05, |
|
"loss": 0.2755, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 12.883712192372721, |
|
"grad_norm": 1.1532506942749023, |
|
"learning_rate": 3.371538850293088e-05, |
|
"loss": 0.2628, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 12.898741311290626, |
|
"grad_norm": 1.1272519826889038, |
|
"learning_rate": 3.359137205165251e-05, |
|
"loss": 0.2699, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 12.91377043020853, |
|
"grad_norm": 1.073285698890686, |
|
"learning_rate": 3.3467468627702734e-05, |
|
"loss": 0.2677, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 12.928799549126433, |
|
"grad_norm": 1.2244044542312622, |
|
"learning_rate": 3.334367908456384e-05, |
|
"loss": 0.2673, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 12.943828668044336, |
|
"grad_norm": 1.1868269443511963, |
|
"learning_rate": 3.32200042749336e-05, |
|
"loss": 0.2671, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 12.95885778696224, |
|
"grad_norm": 1.1779018640518188, |
|
"learning_rate": 3.309644505071959e-05, |
|
"loss": 0.2744, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 12.973886905880143, |
|
"grad_norm": 1.1692800521850586, |
|
"learning_rate": 3.297300226303306e-05, |
|
"loss": 0.2741, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 12.988916024798046, |
|
"grad_norm": 1.0709041357040405, |
|
"learning_rate": 3.284967676218336e-05, |
|
"loss": 0.2672, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 13.00300582378358, |
|
"grad_norm": 0.9654292464256287, |
|
"learning_rate": 3.272646939767179e-05, |
|
"loss": 0.255, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 13.018034942701485, |
|
"grad_norm": 0.9214917421340942, |
|
"learning_rate": 3.2603381018186016e-05, |
|
"loss": 0.2085, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 13.033064061619388, |
|
"grad_norm": 0.9971623420715332, |
|
"learning_rate": 3.248041247159401e-05, |
|
"loss": 0.2158, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 13.048093180537292, |
|
"grad_norm": 0.8868154287338257, |
|
"learning_rate": 3.235756460493836e-05, |
|
"loss": 0.2225, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 13.063122299455195, |
|
"grad_norm": 0.9371384382247925, |
|
"learning_rate": 3.2234838264430346e-05, |
|
"loss": 0.2194, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 13.078151418373098, |
|
"grad_norm": 0.933928370475769, |
|
"learning_rate": 3.211223429544415e-05, |
|
"loss": 0.2087, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 13.093180537291001, |
|
"grad_norm": 1.1291043758392334, |
|
"learning_rate": 3.198975354251101e-05, |
|
"loss": 0.214, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 13.108209656208905, |
|
"grad_norm": 0.9412780404090881, |
|
"learning_rate": 3.1867396849313466e-05, |
|
"loss": 0.2059, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 13.123238775126808, |
|
"grad_norm": 0.9674059748649597, |
|
"learning_rate": 3.174516505867943e-05, |
|
"loss": 0.2118, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 13.138267894044711, |
|
"grad_norm": 1.1346533298492432, |
|
"learning_rate": 3.16230590125765e-05, |
|
"loss": 0.2191, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 13.153297012962614, |
|
"grad_norm": 0.9253365993499756, |
|
"learning_rate": 3.150107955210606e-05, |
|
"loss": 0.2137, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 13.16832613188052, |
|
"grad_norm": 1.0744667053222656, |
|
"learning_rate": 3.137922751749762e-05, |
|
"loss": 0.2194, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 13.183355250798423, |
|
"grad_norm": 0.9793460965156555, |
|
"learning_rate": 3.125750374810283e-05, |
|
"loss": 0.2131, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 13.198384369716326, |
|
"grad_norm": 0.923272430896759, |
|
"learning_rate": 3.113590908238994e-05, |
|
"loss": 0.228, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 13.21341348863423, |
|
"grad_norm": 1.0247244834899902, |
|
"learning_rate": 3.101444435793777e-05, |
|
"loss": 0.2104, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 13.228442607552132, |
|
"grad_norm": 1.0090657472610474, |
|
"learning_rate": 3.089311041143017e-05, |
|
"loss": 0.2161, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 13.243471726470036, |
|
"grad_norm": 0.9428199529647827, |
|
"learning_rate": 3.077190807865009e-05, |
|
"loss": 0.2165, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 13.258500845387939, |
|
"grad_norm": 1.083084225654602, |
|
"learning_rate": 3.065083819447393e-05, |
|
"loss": 0.2135, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 13.273529964305842, |
|
"grad_norm": 1.0958205461502075, |
|
"learning_rate": 3.0529901592865705e-05, |
|
"loss": 0.2128, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 13.288559083223745, |
|
"grad_norm": 0.9356290698051453, |
|
"learning_rate": 3.0409099106871374e-05, |
|
"loss": 0.2136, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 13.303588202141649, |
|
"grad_norm": 1.1614493131637573, |
|
"learning_rate": 3.0288431568613053e-05, |
|
"loss": 0.2256, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 13.318617321059552, |
|
"grad_norm": 1.0191394090652466, |
|
"learning_rate": 3.0167899809283308e-05, |
|
"loss": 0.2183, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 13.333646439977457, |
|
"grad_norm": 1.0032422542572021, |
|
"learning_rate": 3.0047504659139404e-05, |
|
"loss": 0.214, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 13.34867555889536, |
|
"grad_norm": 0.9819022417068481, |
|
"learning_rate": 2.9927246947497644e-05, |
|
"loss": 0.2169, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 13.363704677813264, |
|
"grad_norm": 1.050058364868164, |
|
"learning_rate": 2.9807127502727537e-05, |
|
"loss": 0.2249, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 13.378733796731167, |
|
"grad_norm": 0.9431155920028687, |
|
"learning_rate": 2.9687147152246276e-05, |
|
"loss": 0.2148, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 13.39376291564907, |
|
"grad_norm": 0.8861021399497986, |
|
"learning_rate": 2.9567306722512833e-05, |
|
"loss": 0.2202, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 13.408792034566973, |
|
"grad_norm": 1.0134702920913696, |
|
"learning_rate": 2.944760703902244e-05, |
|
"loss": 0.2214, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 13.423821153484877, |
|
"grad_norm": 1.1062716245651245, |
|
"learning_rate": 2.9328048926300766e-05, |
|
"loss": 0.2238, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 13.43885027240278, |
|
"grad_norm": 1.0837918519973755, |
|
"learning_rate": 2.9208633207898372e-05, |
|
"loss": 0.2142, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 13.453879391320683, |
|
"grad_norm": 1.1653366088867188, |
|
"learning_rate": 2.908936070638487e-05, |
|
"loss": 0.2172, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 13.468908510238588, |
|
"grad_norm": 1.0416685342788696, |
|
"learning_rate": 2.8970232243343482e-05, |
|
"loss": 0.2185, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 13.483937629156491, |
|
"grad_norm": 1.0021854639053345, |
|
"learning_rate": 2.8851248639365114e-05, |
|
"loss": 0.2166, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 13.498966748074395, |
|
"grad_norm": 1.0365519523620605, |
|
"learning_rate": 2.8732410714042957e-05, |
|
"loss": 0.2209, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 13.513995866992298, |
|
"grad_norm": 1.008899211883545, |
|
"learning_rate": 2.8613719285966623e-05, |
|
"loss": 0.2254, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 13.529024985910201, |
|
"grad_norm": 0.8905879855155945, |
|
"learning_rate": 2.8495175172716692e-05, |
|
"loss": 0.2204, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 13.544054104828104, |
|
"grad_norm": 1.0459271669387817, |
|
"learning_rate": 2.837677919085896e-05, |
|
"loss": 0.217, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 13.559083223746008, |
|
"grad_norm": 1.0746241807937622, |
|
"learning_rate": 2.8258532155938875e-05, |
|
"loss": 0.2154, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 13.574112342663911, |
|
"grad_norm": 1.0592225790023804, |
|
"learning_rate": 2.8140434882475847e-05, |
|
"loss": 0.2232, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 13.589141461581814, |
|
"grad_norm": 0.9885957837104797, |
|
"learning_rate": 2.802248818395773e-05, |
|
"loss": 0.2158, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 13.604170580499718, |
|
"grad_norm": 1.1569939851760864, |
|
"learning_rate": 2.790469287283517e-05, |
|
"loss": 0.2218, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 13.61919969941762, |
|
"grad_norm": 1.135467529296875, |
|
"learning_rate": 2.7787049760516013e-05, |
|
"loss": 0.2214, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 13.634228818335526, |
|
"grad_norm": 1.140293002128601, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 0.2174, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 13.649257937253429, |
|
"grad_norm": 1.062946081161499, |
|
"learning_rate": 2.755222337267168e-05, |
|
"loss": 0.2245, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 13.664287056171332, |
|
"grad_norm": 1.142333984375, |
|
"learning_rate": 2.74350417146979e-05, |
|
"loss": 0.2159, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 13.679316175089236, |
|
"grad_norm": 1.206817388534546, |
|
"learning_rate": 2.731801549061923e-05, |
|
"loss": 0.2213, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 13.694345294007139, |
|
"grad_norm": 1.0265262126922607, |
|
"learning_rate": 2.7201145506545756e-05, |
|
"loss": 0.2307, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 13.709374412925042, |
|
"grad_norm": 1.2109159231185913, |
|
"learning_rate": 2.7084432567511443e-05, |
|
"loss": 0.2188, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 13.724403531842945, |
|
"grad_norm": 1.3201031684875488, |
|
"learning_rate": 2.6967877477468397e-05, |
|
"loss": 0.2243, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 13.739432650760849, |
|
"grad_norm": 1.1013463735580444, |
|
"learning_rate": 2.6851481039281478e-05, |
|
"loss": 0.2285, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 13.754461769678752, |
|
"grad_norm": 1.1080180406570435, |
|
"learning_rate": 2.6735244054722697e-05, |
|
"loss": 0.2289, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 13.769490888596657, |
|
"grad_norm": 1.0649311542510986, |
|
"learning_rate": 2.66191673244657e-05, |
|
"loss": 0.2243, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 13.78452000751456, |
|
"grad_norm": 1.1212127208709717, |
|
"learning_rate": 2.6503251648080212e-05, |
|
"loss": 0.217, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 13.799549126432463, |
|
"grad_norm": 1.0007354021072388, |
|
"learning_rate": 2.6387497824026637e-05, |
|
"loss": 0.2213, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 13.814578245350367, |
|
"grad_norm": 0.9835550785064697, |
|
"learning_rate": 2.6271906649650457e-05, |
|
"loss": 0.2206, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 13.82960736426827, |
|
"grad_norm": 1.1858932971954346, |
|
"learning_rate": 2.6156478921176807e-05, |
|
"loss": 0.2285, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 13.844636483186173, |
|
"grad_norm": 1.2049376964569092, |
|
"learning_rate": 2.6041215433704903e-05, |
|
"loss": 0.2236, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 13.859665602104076, |
|
"grad_norm": 0.9520084261894226, |
|
"learning_rate": 2.5926116981202688e-05, |
|
"loss": 0.233, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 13.87469472102198, |
|
"grad_norm": 1.0784698724746704, |
|
"learning_rate": 2.581118435650121e-05, |
|
"loss": 0.2284, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 13.889723839939883, |
|
"grad_norm": 1.1517982482910156, |
|
"learning_rate": 2.5696418351289387e-05, |
|
"loss": 0.2209, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 13.904752958857786, |
|
"grad_norm": 1.0725606679916382, |
|
"learning_rate": 2.558181975610827e-05, |
|
"loss": 0.2179, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 13.91978207777569, |
|
"grad_norm": 1.0226749181747437, |
|
"learning_rate": 2.546738936034585e-05, |
|
"loss": 0.2247, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 13.934811196693595, |
|
"grad_norm": 1.1553442478179932, |
|
"learning_rate": 2.5353127952231404e-05, |
|
"loss": 0.2179, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 13.949840315611498, |
|
"grad_norm": 1.0485488176345825, |
|
"learning_rate": 2.5239036318830278e-05, |
|
"loss": 0.2179, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 13.964869434529401, |
|
"grad_norm": 1.2220666408538818, |
|
"learning_rate": 2.51251152460383e-05, |
|
"loss": 0.2247, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 13.979898553447304, |
|
"grad_norm": 1.1536996364593506, |
|
"learning_rate": 2.5011365518576467e-05, |
|
"loss": 0.2331, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 13.994927672365208, |
|
"grad_norm": 1.0037457942962646, |
|
"learning_rate": 2.4897787919985454e-05, |
|
"loss": 0.2266, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 14.009017471350742, |
|
"grad_norm": 0.900565505027771, |
|
"learning_rate": 2.4784383232620295e-05, |
|
"loss": 0.1914, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 14.024046590268645, |
|
"grad_norm": 0.9061153531074524, |
|
"learning_rate": 2.467115223764495e-05, |
|
"loss": 0.1753, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 14.039075709186548, |
|
"grad_norm": 0.8884809613227844, |
|
"learning_rate": 2.4558095715026973e-05, |
|
"loss": 0.1721, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 14.054104828104453, |
|
"grad_norm": 0.9852058291435242, |
|
"learning_rate": 2.4445214443532027e-05, |
|
"loss": 0.1734, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 14.069133947022356, |
|
"grad_norm": 0.8632417321205139, |
|
"learning_rate": 2.4332509200718673e-05, |
|
"loss": 0.1898, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 14.08416306594026, |
|
"grad_norm": 0.9666391015052795, |
|
"learning_rate": 2.421998076293285e-05, |
|
"loss": 0.1835, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 14.099192184858163, |
|
"grad_norm": 0.8072938919067383, |
|
"learning_rate": 2.4107629905302738e-05, |
|
"loss": 0.1845, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 14.114221303776066, |
|
"grad_norm": 1.2991918325424194, |
|
"learning_rate": 2.3995457401733158e-05, |
|
"loss": 0.1809, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 14.12925042269397, |
|
"grad_norm": 0.8927931785583496, |
|
"learning_rate": 2.3883464024900482e-05, |
|
"loss": 0.1743, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 14.144279541611873, |
|
"grad_norm": 0.9115880727767944, |
|
"learning_rate": 2.3771650546247128e-05, |
|
"loss": 0.1742, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 14.159308660529776, |
|
"grad_norm": 0.904136061668396, |
|
"learning_rate": 2.3660017735976374e-05, |
|
"loss": 0.1873, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 14.17433777944768, |
|
"grad_norm": 0.9878782629966736, |
|
"learning_rate": 2.3548566363046992e-05, |
|
"loss": 0.1839, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 14.189366898365583, |
|
"grad_norm": 1.261094093322754, |
|
"learning_rate": 2.343729719516798e-05, |
|
"loss": 0.1722, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 14.204396017283488, |
|
"grad_norm": 0.959791362285614, |
|
"learning_rate": 2.332621099879318e-05, |
|
"loss": 0.1797, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 14.21942513620139, |
|
"grad_norm": 1.0712839365005493, |
|
"learning_rate": 2.321530853911616e-05, |
|
"loss": 0.1779, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 14.234454255119294, |
|
"grad_norm": 0.9205087423324585, |
|
"learning_rate": 2.3104590580064823e-05, |
|
"loss": 0.1978, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 14.249483374037197, |
|
"grad_norm": 0.9004307985305786, |
|
"learning_rate": 2.299405788429619e-05, |
|
"loss": 0.1792, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 14.2645124929551, |
|
"grad_norm": 0.9223144054412842, |
|
"learning_rate": 2.288371121319109e-05, |
|
"loss": 0.1795, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 14.279541611873004, |
|
"grad_norm": 0.8646677732467651, |
|
"learning_rate": 2.2773551326849036e-05, |
|
"loss": 0.1778, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 14.294570730790907, |
|
"grad_norm": 1.060955286026001, |
|
"learning_rate": 2.266357898408282e-05, |
|
"loss": 0.1864, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 14.30959984970881, |
|
"grad_norm": 0.9104660153388977, |
|
"learning_rate": 2.2553794942413503e-05, |
|
"loss": 0.1825, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 14.324628968626714, |
|
"grad_norm": 0.945350170135498, |
|
"learning_rate": 2.2444199958064955e-05, |
|
"loss": 0.1836, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 14.339658087544617, |
|
"grad_norm": 1.2413114309310913, |
|
"learning_rate": 2.2334794785958845e-05, |
|
"loss": 0.1769, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 14.354687206462522, |
|
"grad_norm": 0.9645456671714783, |
|
"learning_rate": 2.2225580179709303e-05, |
|
"loss": 0.1845, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 14.369716325380425, |
|
"grad_norm": 0.9362895488739014, |
|
"learning_rate": 2.2116556891617825e-05, |
|
"loss": 0.1813, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 14.384745444298328, |
|
"grad_norm": 1.0554242134094238, |
|
"learning_rate": 2.200772567266805e-05, |
|
"loss": 0.1932, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 14.399774563216232, |
|
"grad_norm": 1.0449492931365967, |
|
"learning_rate": 2.1899087272520595e-05, |
|
"loss": 0.1882, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 14.414803682134135, |
|
"grad_norm": 1.107164978981018, |
|
"learning_rate": 2.179064243950784e-05, |
|
"loss": 0.1878, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 14.429832801052038, |
|
"grad_norm": 1.010380506515503, |
|
"learning_rate": 2.1682391920628868e-05, |
|
"loss": 0.1784, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 14.444861919969942, |
|
"grad_norm": 1.1067860126495361, |
|
"learning_rate": 2.1574336461544258e-05, |
|
"loss": 0.1823, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 14.459891038887845, |
|
"grad_norm": 1.0193742513656616, |
|
"learning_rate": 2.1466476806570972e-05, |
|
"loss": 0.1887, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 14.474920157805748, |
|
"grad_norm": 0.9946687817573547, |
|
"learning_rate": 2.1358813698677178e-05, |
|
"loss": 0.1956, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 14.489949276723651, |
|
"grad_norm": 1.2227554321289062, |
|
"learning_rate": 2.125134787947722e-05, |
|
"loss": 0.1815, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 14.504978395641556, |
|
"grad_norm": 1.002421259880066, |
|
"learning_rate": 2.114408008922639e-05, |
|
"loss": 0.1851, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 14.52000751455946, |
|
"grad_norm": 1.0360831022262573, |
|
"learning_rate": 2.103701106681602e-05, |
|
"loss": 0.1838, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 14.535036633477363, |
|
"grad_norm": 0.9968597292900085, |
|
"learning_rate": 2.0930141549768144e-05, |
|
"loss": 0.1842, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 14.550065752395266, |
|
"grad_norm": 1.0610520839691162, |
|
"learning_rate": 2.082347227423064e-05, |
|
"loss": 0.1844, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 14.56509487131317, |
|
"grad_norm": 0.9733484983444214, |
|
"learning_rate": 2.071700397497199e-05, |
|
"loss": 0.1877, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 14.580123990231073, |
|
"grad_norm": 1.059486746788025, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.1917, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 14.595153109148976, |
|
"grad_norm": 1.0647083520889282, |
|
"learning_rate": 2.0504673237438422e-05, |
|
"loss": 0.1935, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 14.61018222806688, |
|
"grad_norm": 1.005767583847046, |
|
"learning_rate": 2.0398812261758444e-05, |
|
"loss": 0.1868, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 14.625211346984782, |
|
"grad_norm": 1.0666831731796265, |
|
"learning_rate": 2.029315518753711e-05, |
|
"loss": 0.1863, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 14.640240465902686, |
|
"grad_norm": 1.0782824754714966, |
|
"learning_rate": 2.018770274257062e-05, |
|
"loss": 0.2028, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 14.65526958482059, |
|
"grad_norm": 0.9997120499610901, |
|
"learning_rate": 2.0082455653245612e-05, |
|
"loss": 0.1945, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 14.670298703738494, |
|
"grad_norm": 1.096117615699768, |
|
"learning_rate": 1.9977414644534205e-05, |
|
"loss": 0.1876, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 14.685327822656397, |
|
"grad_norm": 0.9982436895370483, |
|
"learning_rate": 1.98725804399889e-05, |
|
"loss": 0.1847, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 14.7003569415743, |
|
"grad_norm": 1.2439534664154053, |
|
"learning_rate": 1.9767953761737772e-05, |
|
"loss": 0.189, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 14.715386060492204, |
|
"grad_norm": 1.0233805179595947, |
|
"learning_rate": 1.9663535330479305e-05, |
|
"loss": 0.1905, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 14.730415179410107, |
|
"grad_norm": 0.9537500739097595, |
|
"learning_rate": 1.9559325865477573e-05, |
|
"loss": 0.1757, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 14.74544429832801, |
|
"grad_norm": 1.0633177757263184, |
|
"learning_rate": 1.9455326084557213e-05, |
|
"loss": 0.1926, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 14.760473417245914, |
|
"grad_norm": 0.9927921295166016, |
|
"learning_rate": 1.9351536704098527e-05, |
|
"loss": 0.1907, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 14.775502536163817, |
|
"grad_norm": 1.0007320642471313, |
|
"learning_rate": 1.9247958439032448e-05, |
|
"loss": 0.189, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 14.79053165508172, |
|
"grad_norm": 1.1696594953536987, |
|
"learning_rate": 1.9144592002835756e-05, |
|
"loss": 0.1894, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 14.805560773999623, |
|
"grad_norm": 4.139706611633301, |
|
"learning_rate": 1.9041438107526056e-05, |
|
"loss": 0.1839, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 14.820589892917528, |
|
"grad_norm": 0.9341458678245544, |
|
"learning_rate": 1.8938497463656945e-05, |
|
"loss": 0.1991, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 14.835619011835432, |
|
"grad_norm": 1.1703625917434692, |
|
"learning_rate": 1.8835770780313027e-05, |
|
"loss": 0.1837, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 14.850648130753335, |
|
"grad_norm": 0.9725760221481323, |
|
"learning_rate": 1.8733258765105126e-05, |
|
"loss": 0.1831, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 14.865677249671238, |
|
"grad_norm": 0.9153964519500732, |
|
"learning_rate": 1.8630962124165375e-05, |
|
"loss": 0.1955, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 14.880706368589141, |
|
"grad_norm": 1.1788238286972046, |
|
"learning_rate": 1.852888156214233e-05, |
|
"loss": 0.1869, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 14.895735487507045, |
|
"grad_norm": 0.9835808873176575, |
|
"learning_rate": 1.8427017782196127e-05, |
|
"loss": 0.1915, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 14.910764606424948, |
|
"grad_norm": 1.1048306226730347, |
|
"learning_rate": 1.832537148599367e-05, |
|
"loss": 0.1851, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 14.925793725342851, |
|
"grad_norm": 1.847183108329773, |
|
"learning_rate": 1.8223943373703734e-05, |
|
"loss": 0.1848, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 14.940822844260754, |
|
"grad_norm": 0.9361986517906189, |
|
"learning_rate": 1.8122734143992214e-05, |
|
"loss": 0.1946, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 14.95585196317866, |
|
"grad_norm": 1.007897973060608, |
|
"learning_rate": 1.8021744494017283e-05, |
|
"loss": 0.1917, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 14.970881082096563, |
|
"grad_norm": 1.0453609228134155, |
|
"learning_rate": 1.7920975119424576e-05, |
|
"loss": 0.1956, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 14.985910201014466, |
|
"grad_norm": 1.3399736881256104, |
|
"learning_rate": 1.7820426714342374e-05, |
|
"loss": 0.1963, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1934865713119507, |
|
"learning_rate": 1.7720099971376907e-05, |
|
"loss": 0.192, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 15.015029118917903, |
|
"grad_norm": 0.9646713733673096, |
|
"learning_rate": 1.7619995581607516e-05, |
|
"loss": 0.1614, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 15.030058237835807, |
|
"grad_norm": 0.815608561038971, |
|
"learning_rate": 1.7520114234581912e-05, |
|
"loss": 0.1628, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 15.04508735675371, |
|
"grad_norm": 0.9114384055137634, |
|
"learning_rate": 1.7420456618311405e-05, |
|
"loss": 0.1567, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 15.060116475671613, |
|
"grad_norm": 0.9106918573379517, |
|
"learning_rate": 1.7321023419266193e-05, |
|
"loss": 0.1582, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 15.075145594589518, |
|
"grad_norm": 0.7602341771125793, |
|
"learning_rate": 1.7221815322370632e-05, |
|
"loss": 0.1563, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 15.090174713507421, |
|
"grad_norm": 0.7736881971359253, |
|
"learning_rate": 1.7122833010998535e-05, |
|
"loss": 0.1533, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 15.105203832425325, |
|
"grad_norm": 0.9630312919616699, |
|
"learning_rate": 1.702407716696836e-05, |
|
"loss": 0.1533, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 15.120232951343228, |
|
"grad_norm": 0.8553804755210876, |
|
"learning_rate": 1.6925548470538695e-05, |
|
"loss": 0.1629, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 15.135262070261131, |
|
"grad_norm": 1.0749071836471558, |
|
"learning_rate": 1.6827247600403366e-05, |
|
"loss": 0.1605, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 15.150291189179034, |
|
"grad_norm": 0.8994390964508057, |
|
"learning_rate": 1.6729175233686955e-05, |
|
"loss": 0.1506, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 15.165320308096938, |
|
"grad_norm": 1.0106632709503174, |
|
"learning_rate": 1.6631332045939996e-05, |
|
"loss": 0.1652, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 15.180349427014841, |
|
"grad_norm": 1.0532327890396118, |
|
"learning_rate": 1.6533718711134412e-05, |
|
"loss": 0.1603, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 15.195378545932744, |
|
"grad_norm": 0.821412205696106, |
|
"learning_rate": 1.6436335901658766e-05, |
|
"loss": 0.1511, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 15.210407664850647, |
|
"grad_norm": 0.8959778547286987, |
|
"learning_rate": 1.633918428831377e-05, |
|
"loss": 0.1609, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 15.22543678376855, |
|
"grad_norm": 0.8607751131057739, |
|
"learning_rate": 1.6242264540307552e-05, |
|
"loss": 0.1579, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 15.240465902686456, |
|
"grad_norm": 0.8581548929214478, |
|
"learning_rate": 1.614557732525111e-05, |
|
"loss": 0.1563, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 15.255495021604359, |
|
"grad_norm": 0.8387672901153564, |
|
"learning_rate": 1.604912330915364e-05, |
|
"loss": 0.1576, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 15.270524140522262, |
|
"grad_norm": 0.871376097202301, |
|
"learning_rate": 1.595290315641806e-05, |
|
"loss": 0.1621, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 15.285553259440166, |
|
"grad_norm": 1.072432279586792, |
|
"learning_rate": 1.585691752983629e-05, |
|
"loss": 0.153, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 15.300582378358069, |
|
"grad_norm": 0.9539718627929688, |
|
"learning_rate": 1.5761167090584882e-05, |
|
"loss": 0.1551, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 15.315611497275972, |
|
"grad_norm": 0.9477748274803162, |
|
"learning_rate": 1.5665652498220236e-05, |
|
"loss": 0.1596, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 15.330640616193875, |
|
"grad_norm": 1.0767313241958618, |
|
"learning_rate": 1.5570374410674243e-05, |
|
"loss": 0.1597, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 15.345669735111779, |
|
"grad_norm": 0.8535225987434387, |
|
"learning_rate": 1.547533348424963e-05, |
|
"loss": 0.1653, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 15.360698854029682, |
|
"grad_norm": 0.92160964012146, |
|
"learning_rate": 1.5380530373615542e-05, |
|
"loss": 0.1487, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 15.375727972947587, |
|
"grad_norm": 0.840239942073822, |
|
"learning_rate": 1.5285965731802944e-05, |
|
"loss": 0.1545, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 15.39075709186549, |
|
"grad_norm": 1.0626702308654785, |
|
"learning_rate": 1.5191640210200187e-05, |
|
"loss": 0.1559, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 15.405786210783393, |
|
"grad_norm": 0.9364585280418396, |
|
"learning_rate": 1.5097554458548452e-05, |
|
"loss": 0.1646, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 15.420815329701297, |
|
"grad_norm": 1.0330567359924316, |
|
"learning_rate": 1.5003709124937354e-05, |
|
"loss": 0.1625, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 15.4358444486192, |
|
"grad_norm": 0.9339507818222046, |
|
"learning_rate": 1.4910104855800427e-05, |
|
"loss": 0.1515, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 15.450873567537103, |
|
"grad_norm": 0.7912824153900146, |
|
"learning_rate": 1.4816742295910708e-05, |
|
"loss": 0.162, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 15.465902686455006, |
|
"grad_norm": 0.9348452687263489, |
|
"learning_rate": 1.4723622088376205e-05, |
|
"loss": 0.1572, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 15.48093180537291, |
|
"grad_norm": 0.8750469088554382, |
|
"learning_rate": 1.463074487463561e-05, |
|
"loss": 0.1485, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 15.495960924290813, |
|
"grad_norm": 0.9709532260894775, |
|
"learning_rate": 1.4538111294453732e-05, |
|
"loss": 0.1583, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 15.510990043208716, |
|
"grad_norm": 0.9631896018981934, |
|
"learning_rate": 1.4445721985917254e-05, |
|
"loss": 0.1606, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 15.52601916212662, |
|
"grad_norm": 0.8176620006561279, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 0.1583, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 15.541048281044525, |
|
"grad_norm": 0.8556742668151855, |
|
"learning_rate": 1.426167872770947e-05, |
|
"loss": 0.1593, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 15.556077399962428, |
|
"grad_norm": 1.2856311798095703, |
|
"learning_rate": 1.4170026045780832e-05, |
|
"loss": 0.169, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 15.571106518880331, |
|
"grad_norm": 1.07082200050354, |
|
"learning_rate": 1.4078620170974177e-05, |
|
"loss": 0.1581, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 15.586135637798234, |
|
"grad_norm": 0.9026190042495728, |
|
"learning_rate": 1.3987461732919343e-05, |
|
"loss": 0.1704, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 15.601164756716138, |
|
"grad_norm": 0.9147086143493652, |
|
"learning_rate": 1.3896551359541782e-05, |
|
"loss": 0.1566, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 15.61619387563404, |
|
"grad_norm": 0.9676672220230103, |
|
"learning_rate": 1.3805889677058149e-05, |
|
"loss": 0.1668, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 15.631222994551944, |
|
"grad_norm": 0.9647960066795349, |
|
"learning_rate": 1.3715477309972086e-05, |
|
"loss": 0.1603, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 15.646252113469847, |
|
"grad_norm": 0.9588443636894226, |
|
"learning_rate": 1.3625314881069873e-05, |
|
"loss": 0.1614, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 15.66128123238775, |
|
"grad_norm": 0.921419084072113, |
|
"learning_rate": 1.3535403011416158e-05, |
|
"loss": 0.1574, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 15.676310351305656, |
|
"grad_norm": 0.9163838624954224, |
|
"learning_rate": 1.3445742320349625e-05, |
|
"loss": 0.1521, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 15.691339470223559, |
|
"grad_norm": 0.9288631081581116, |
|
"learning_rate": 1.3356333425478817e-05, |
|
"loss": 0.159, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 15.706368589141462, |
|
"grad_norm": 0.9103051424026489, |
|
"learning_rate": 1.3267176942677761e-05, |
|
"loss": 0.1648, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 15.721397708059365, |
|
"grad_norm": 0.8684786558151245, |
|
"learning_rate": 1.317827348608191e-05, |
|
"loss": 0.1598, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 15.736426826977269, |
|
"grad_norm": 1.129595160484314, |
|
"learning_rate": 1.3089623668083683e-05, |
|
"loss": 0.1595, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 15.751455945895172, |
|
"grad_norm": 0.8634871244430542, |
|
"learning_rate": 1.3001228099328443e-05, |
|
"loss": 0.1642, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 15.766485064813075, |
|
"grad_norm": 0.932549774646759, |
|
"learning_rate": 1.2913087388710165e-05, |
|
"loss": 0.1541, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 15.781514183730978, |
|
"grad_norm": 0.9329362511634827, |
|
"learning_rate": 1.282520214336731e-05, |
|
"loss": 0.1523, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 15.796543302648882, |
|
"grad_norm": 0.9856179356575012, |
|
"learning_rate": 1.2737572968678623e-05, |
|
"loss": 0.1597, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 15.811572421566785, |
|
"grad_norm": 0.9236768484115601, |
|
"learning_rate": 1.2650200468258966e-05, |
|
"loss": 0.161, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 15.826601540484688, |
|
"grad_norm": 1.0709694623947144, |
|
"learning_rate": 1.256308524395512e-05, |
|
"loss": 0.1641, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 15.841630659402593, |
|
"grad_norm": 0.8838292956352234, |
|
"learning_rate": 1.2476227895841713e-05, |
|
"loss": 0.1683, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 15.856659778320497, |
|
"grad_norm": 1.0665549039840698, |
|
"learning_rate": 1.238962902221703e-05, |
|
"loss": 0.165, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 15.8716888972384, |
|
"grad_norm": 0.876946210861206, |
|
"learning_rate": 1.2303289219598934e-05, |
|
"loss": 0.1645, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 15.886718016156303, |
|
"grad_norm": 0.8602812886238098, |
|
"learning_rate": 1.2217209082720677e-05, |
|
"loss": 0.1648, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 15.901747135074206, |
|
"grad_norm": 0.9444336295127869, |
|
"learning_rate": 1.2131389204526927e-05, |
|
"loss": 0.1531, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 15.91677625399211, |
|
"grad_norm": 0.8952954411506653, |
|
"learning_rate": 1.2045830176169542e-05, |
|
"loss": 0.1653, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 15.931805372910013, |
|
"grad_norm": 0.9685820937156677, |
|
"learning_rate": 1.1960532587003664e-05, |
|
"loss": 0.1683, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 15.946834491827916, |
|
"grad_norm": 0.9807755351066589, |
|
"learning_rate": 1.1875497024583476e-05, |
|
"loss": 0.1588, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 15.96186361074582, |
|
"grad_norm": 0.986831784248352, |
|
"learning_rate": 1.1790724074658315e-05, |
|
"loss": 0.1734, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 15.976892729663723, |
|
"grad_norm": 0.932146430015564, |
|
"learning_rate": 1.1706214321168513e-05, |
|
"loss": 0.1581, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 15.991921848581628, |
|
"grad_norm": 0.9639928936958313, |
|
"learning_rate": 1.1621968346241457e-05, |
|
"loss": 0.1595, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 16.00601164756716, |
|
"grad_norm": 0.7162834405899048, |
|
"learning_rate": 1.1537986730187566e-05, |
|
"loss": 0.1529, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 16.021040766485065, |
|
"grad_norm": 0.9273526072502136, |
|
"learning_rate": 1.1454270051496264e-05, |
|
"loss": 0.1424, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 16.03606988540297, |
|
"grad_norm": 0.7194620370864868, |
|
"learning_rate": 1.1370818886831985e-05, |
|
"loss": 0.147, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 16.05109900432087, |
|
"grad_norm": 0.8820509910583496, |
|
"learning_rate": 1.1287633811030268e-05, |
|
"loss": 0.1394, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 16.066128123238776, |
|
"grad_norm": 0.9373833537101746, |
|
"learning_rate": 1.1204715397093734e-05, |
|
"loss": 0.1347, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 16.081157242156678, |
|
"grad_norm": 0.7921836376190186, |
|
"learning_rate": 1.1122064216188183e-05, |
|
"loss": 0.1368, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 16.096186361074583, |
|
"grad_norm": 0.7020202875137329, |
|
"learning_rate": 1.1039680837638594e-05, |
|
"loss": 0.1403, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 16.111215479992484, |
|
"grad_norm": 0.7879025340080261, |
|
"learning_rate": 1.0957565828925293e-05, |
|
"loss": 0.1319, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 16.12624459891039, |
|
"grad_norm": 0.7713704705238342, |
|
"learning_rate": 1.0875719755679936e-05, |
|
"loss": 0.1335, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 16.14127371782829, |
|
"grad_norm": 0.8271151185035706, |
|
"learning_rate": 1.0794143181681782e-05, |
|
"loss": 0.1357, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 16.156302836746196, |
|
"grad_norm": 0.7664535641670227, |
|
"learning_rate": 1.0712836668853582e-05, |
|
"loss": 0.137, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 16.171331955664098, |
|
"grad_norm": 0.8511399626731873, |
|
"learning_rate": 1.063180077725791e-05, |
|
"loss": 0.151, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 16.186361074582003, |
|
"grad_norm": 0.8683989644050598, |
|
"learning_rate": 1.0551036065093172e-05, |
|
"loss": 0.1416, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 16.201390193499908, |
|
"grad_norm": 0.8145375847816467, |
|
"learning_rate": 1.0470543088689855e-05, |
|
"loss": 0.1364, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 16.21641931241781, |
|
"grad_norm": 0.9890855550765991, |
|
"learning_rate": 1.0390322402506619e-05, |
|
"loss": 0.1312, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 16.231448431335714, |
|
"grad_norm": 0.7960677742958069, |
|
"learning_rate": 1.0310374559126551e-05, |
|
"loss": 0.1259, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 16.246477550253616, |
|
"grad_norm": 0.7810579538345337, |
|
"learning_rate": 1.0230700109253256e-05, |
|
"loss": 0.1476, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 16.26150666917152, |
|
"grad_norm": 0.7869362235069275, |
|
"learning_rate": 1.0151299601707187e-05, |
|
"loss": 0.1326, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 16.276535788089422, |
|
"grad_norm": 0.7896257042884827, |
|
"learning_rate": 1.0072173583421769e-05, |
|
"loss": 0.1414, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 16.291564907007327, |
|
"grad_norm": 0.8226996660232544, |
|
"learning_rate": 9.993322599439692e-06, |
|
"loss": 0.1437, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 16.30659402592523, |
|
"grad_norm": 0.8732724785804749, |
|
"learning_rate": 9.914747192909096e-06, |
|
"loss": 0.1286, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 16.321623144843134, |
|
"grad_norm": 0.8967133164405823, |
|
"learning_rate": 9.836447905079905e-06, |
|
"loss": 0.1476, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 16.33665226376104, |
|
"grad_norm": 0.8874047994613647, |
|
"learning_rate": 9.758425275299999e-06, |
|
"loss": 0.1301, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 16.35168138267894, |
|
"grad_norm": 0.7454355359077454, |
|
"learning_rate": 9.680679841011652e-06, |
|
"loss": 0.1466, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 16.366710501596845, |
|
"grad_norm": 0.9600047469139099, |
|
"learning_rate": 9.603212137747641e-06, |
|
"loss": 0.1384, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 16.381739620514747, |
|
"grad_norm": 1.0687470436096191, |
|
"learning_rate": 9.526022699127718e-06, |
|
"loss": 0.1337, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 16.396768739432652, |
|
"grad_norm": 0.7660526633262634, |
|
"learning_rate": 9.449112056854813e-06, |
|
"loss": 0.1372, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 16.411797858350553, |
|
"grad_norm": 0.7811424136161804, |
|
"learning_rate": 9.372480740711475e-06, |
|
"loss": 0.1368, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 16.42682697726846, |
|
"grad_norm": 0.9468358159065247, |
|
"learning_rate": 9.296129278556155e-06, |
|
"loss": 0.1399, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 16.44185609618636, |
|
"grad_norm": 0.799017071723938, |
|
"learning_rate": 9.220058196319598e-06, |
|
"loss": 0.1439, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 16.456885215104265, |
|
"grad_norm": 0.811414361000061, |
|
"learning_rate": 9.144268018001184e-06, |
|
"loss": 0.1445, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 16.471914334022166, |
|
"grad_norm": 0.8114548325538635, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 0.1478, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 16.48694345294007, |
|
"grad_norm": 0.753917932510376, |
|
"learning_rate": 8.993532459438098e-06, |
|
"loss": 0.1432, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 16.501972571857976, |
|
"grad_norm": 0.8858105540275574, |
|
"learning_rate": 8.91858811750313e-06, |
|
"loss": 0.1367, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 16.517001690775878, |
|
"grad_norm": 0.7127811312675476, |
|
"learning_rate": 8.843926756098547e-06, |
|
"loss": 0.1342, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 16.532030809693783, |
|
"grad_norm": 0.8266831636428833, |
|
"learning_rate": 8.769548889513212e-06, |
|
"loss": 0.1492, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 16.547059928611684, |
|
"grad_norm": 0.9057301878929138, |
|
"learning_rate": 8.695455030083144e-06, |
|
"loss": 0.1474, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 16.56208904752959, |
|
"grad_norm": 0.7918298840522766, |
|
"learning_rate": 8.621645688188085e-06, |
|
"loss": 0.1388, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 16.57711816644749, |
|
"grad_norm": 0.8264976739883423, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 0.1449, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 16.592147285365396, |
|
"grad_norm": 0.9591594934463501, |
|
"learning_rate": 8.474882588719196e-06, |
|
"loss": 0.1436, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 16.607176404283297, |
|
"grad_norm": 0.8288829326629639, |
|
"learning_rate": 8.401929842091616e-06, |
|
"loss": 0.1291, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 16.622205523201202, |
|
"grad_norm": 0.865283191204071, |
|
"learning_rate": 8.329263634884598e-06, |
|
"loss": 0.1443, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 16.637234642119104, |
|
"grad_norm": 0.8038478493690491, |
|
"learning_rate": 8.256884467643788e-06, |
|
"loss": 0.1409, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 16.65226376103701, |
|
"grad_norm": 0.7755337357521057, |
|
"learning_rate": 8.184792838937633e-06, |
|
"loss": 0.1378, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 16.667292879954914, |
|
"grad_norm": 0.7843419313430786, |
|
"learning_rate": 8.112989245353896e-06, |
|
"loss": 0.1532, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 16.682321998872816, |
|
"grad_norm": 0.7573866248130798, |
|
"learning_rate": 8.0414741814963e-06, |
|
"loss": 0.1451, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 16.69735111779072, |
|
"grad_norm": 0.8233633637428284, |
|
"learning_rate": 7.97024813998109e-06, |
|
"loss": 0.1364, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 16.712380236708622, |
|
"grad_norm": 0.8834894895553589, |
|
"learning_rate": 7.899311611433646e-06, |
|
"loss": 0.1431, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 16.727409355626527, |
|
"grad_norm": 0.8282538056373596, |
|
"learning_rate": 7.828665084485076e-06, |
|
"loss": 0.1316, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 16.74243847454443, |
|
"grad_norm": 0.7527298927307129, |
|
"learning_rate": 7.758309045768908e-06, |
|
"loss": 0.1465, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 16.757467593462334, |
|
"grad_norm": 0.7522730827331543, |
|
"learning_rate": 7.688243979917664e-06, |
|
"loss": 0.1386, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 16.772496712380235, |
|
"grad_norm": 0.949739933013916, |
|
"learning_rate": 7.6184703695595936e-06, |
|
"loss": 0.1317, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 16.78752583129814, |
|
"grad_norm": 0.8552820086479187, |
|
"learning_rate": 7.5489886953153125e-06, |
|
"loss": 0.1313, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 16.802554950216045, |
|
"grad_norm": 0.7522038817405701, |
|
"learning_rate": 7.479799435794499e-06, |
|
"loss": 0.1399, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 16.817584069133947, |
|
"grad_norm": 0.8218302726745605, |
|
"learning_rate": 7.410903067592562e-06, |
|
"loss": 0.139, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 16.83261318805185, |
|
"grad_norm": 0.7487614154815674, |
|
"learning_rate": 7.342300065287439e-06, |
|
"loss": 0.1462, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 16.847642306969753, |
|
"grad_norm": 0.8830420970916748, |
|
"learning_rate": 7.273990901436245e-06, |
|
"loss": 0.1466, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 16.862671425887658, |
|
"grad_norm": 1.094682216644287, |
|
"learning_rate": 7.2059760465720825e-06, |
|
"loss": 0.1473, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 16.87770054480556, |
|
"grad_norm": 0.7629777789115906, |
|
"learning_rate": 7.1382559692007245e-06, |
|
"loss": 0.1385, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 16.892729663723465, |
|
"grad_norm": 0.7562497854232788, |
|
"learning_rate": 7.070831135797473e-06, |
|
"loss": 0.1454, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 16.907758782641366, |
|
"grad_norm": 0.8945866823196411, |
|
"learning_rate": 7.003702010803892e-06, |
|
"loss": 0.1405, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 16.92278790155927, |
|
"grad_norm": 0.7205698490142822, |
|
"learning_rate": 6.936869056624623e-06, |
|
"loss": 0.1475, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 16.937817020477176, |
|
"grad_norm": 0.8356210589408875, |
|
"learning_rate": 6.870332733624174e-06, |
|
"loss": 0.1431, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 16.952846139395078, |
|
"grad_norm": 0.8396646976470947, |
|
"learning_rate": 6.8040935001238256e-06, |
|
"loss": 0.1426, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 16.967875258312983, |
|
"grad_norm": 0.9201752543449402, |
|
"learning_rate": 6.738151812398352e-06, |
|
"loss": 0.1434, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 16.982904377230884, |
|
"grad_norm": 0.9603893756866455, |
|
"learning_rate": 6.67250812467301e-06, |
|
"loss": 0.142, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 16.99793349614879, |
|
"grad_norm": 0.7966869473457336, |
|
"learning_rate": 6.607162889120305e-06, |
|
"loss": 0.155, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 17.012023295134323, |
|
"grad_norm": 0.5946935415267944, |
|
"learning_rate": 6.542116555856953e-06, |
|
"loss": 0.1274, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 17.027052414052225, |
|
"grad_norm": 0.774712324142456, |
|
"learning_rate": 6.477369572940706e-06, |
|
"loss": 0.1221, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 17.04208153297013, |
|
"grad_norm": 0.7754786610603333, |
|
"learning_rate": 6.412922386367332e-06, |
|
"loss": 0.1317, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 17.05711065188803, |
|
"grad_norm": 0.6870192885398865, |
|
"learning_rate": 6.348775440067506e-06, |
|
"loss": 0.1174, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 17.072139770805936, |
|
"grad_norm": 0.8024049401283264, |
|
"learning_rate": 6.284929175903786e-06, |
|
"loss": 0.127, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 17.08716888972384, |
|
"grad_norm": 0.752888023853302, |
|
"learning_rate": 6.2213840336674936e-06, |
|
"loss": 0.1207, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 17.102198008641743, |
|
"grad_norm": 0.7125491499900818, |
|
"learning_rate": 6.158140451075795e-06, |
|
"loss": 0.1351, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 17.117227127559648, |
|
"grad_norm": 0.7468791007995605, |
|
"learning_rate": 6.095198863768564e-06, |
|
"loss": 0.131, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 17.13225624647755, |
|
"grad_norm": 0.8037786483764648, |
|
"learning_rate": 6.032559705305523e-06, |
|
"loss": 0.1308, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 17.147285365395454, |
|
"grad_norm": 0.7919206023216248, |
|
"learning_rate": 5.9702234071631e-06, |
|
"loss": 0.1234, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 17.162314484313356, |
|
"grad_norm": 0.7676987051963806, |
|
"learning_rate": 5.9081903987316e-06, |
|
"loss": 0.1197, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 17.17734360323126, |
|
"grad_norm": 1.1687105894088745, |
|
"learning_rate": 5.8464611073121235e-06, |
|
"loss": 0.1241, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 17.192372722149162, |
|
"grad_norm": 0.7436251044273376, |
|
"learning_rate": 5.785035958113716e-06, |
|
"loss": 0.1288, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 17.207401841067067, |
|
"grad_norm": 0.656187117099762, |
|
"learning_rate": 5.7239153742503995e-06, |
|
"loss": 0.1187, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 17.222430959984973, |
|
"grad_norm": 0.6904690265655518, |
|
"learning_rate": 5.663099776738273e-06, |
|
"loss": 0.1366, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 17.237460078902874, |
|
"grad_norm": 0.8284912109375, |
|
"learning_rate": 5.602589584492562e-06, |
|
"loss": 0.1242, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 17.25248919782078, |
|
"grad_norm": 0.8081623911857605, |
|
"learning_rate": 5.542385214324819e-06, |
|
"loss": 0.1234, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 17.26751831673868, |
|
"grad_norm": 1.1938631534576416, |
|
"learning_rate": 5.48248708093998e-06, |
|
"loss": 0.1326, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 17.282547435656586, |
|
"grad_norm": 0.6938109993934631, |
|
"learning_rate": 5.422895596933558e-06, |
|
"loss": 0.1305, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 17.297576554574487, |
|
"grad_norm": 0.7339420914649963, |
|
"learning_rate": 5.36361117278874e-06, |
|
"loss": 0.1206, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 17.312605673492392, |
|
"grad_norm": 0.7437239289283752, |
|
"learning_rate": 5.304634216873633e-06, |
|
"loss": 0.1205, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 17.327634792410294, |
|
"grad_norm": 0.7222012281417847, |
|
"learning_rate": 5.24596513543838e-06, |
|
"loss": 0.1219, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 17.3426639113282, |
|
"grad_norm": 0.8264778852462769, |
|
"learning_rate": 5.187604332612445e-06, |
|
"loss": 0.1318, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 17.3576930302461, |
|
"grad_norm": 0.7213618159294128, |
|
"learning_rate": 5.129552210401728e-06, |
|
"loss": 0.1203, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 17.372722149164005, |
|
"grad_norm": 0.7722398638725281, |
|
"learning_rate": 5.071809168685887e-06, |
|
"loss": 0.1266, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 17.38775126808191, |
|
"grad_norm": 0.8326044678688049, |
|
"learning_rate": 5.014375605215521e-06, |
|
"loss": 0.1267, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 17.40278038699981, |
|
"grad_norm": 0.886371374130249, |
|
"learning_rate": 4.957251915609462e-06, |
|
"loss": 0.119, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 17.417809505917717, |
|
"grad_norm": 0.7517515420913696, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 0.1291, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 17.432838624835618, |
|
"grad_norm": 0.8436376452445984, |
|
"learning_rate": 4.843935729790422e-06, |
|
"loss": 0.1336, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 17.447867743753523, |
|
"grad_norm": 0.8188118934631348, |
|
"learning_rate": 4.7877440141317675e-06, |
|
"loss": 0.1276, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 17.462896862671425, |
|
"grad_norm": 0.7850053310394287, |
|
"learning_rate": 4.731863733440733e-06, |
|
"loss": 0.1263, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 17.47792598158933, |
|
"grad_norm": 0.7156862616539001, |
|
"learning_rate": 4.676295272636688e-06, |
|
"loss": 0.1371, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 17.49295510050723, |
|
"grad_norm": 0.9043847322463989, |
|
"learning_rate": 4.621039014491119e-06, |
|
"loss": 0.136, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 17.507984219425136, |
|
"grad_norm": 0.7520122528076172, |
|
"learning_rate": 4.566095339624943e-06, |
|
"loss": 0.1278, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 17.52301333834304, |
|
"grad_norm": 0.8322932124137878, |
|
"learning_rate": 4.511464626505935e-06, |
|
"loss": 0.1178, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 17.538042457260943, |
|
"grad_norm": 0.7075957655906677, |
|
"learning_rate": 4.457147251446075e-06, |
|
"loss": 0.1295, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 17.553071576178848, |
|
"grad_norm": 0.7323919534683228, |
|
"learning_rate": 4.403143588599029e-06, |
|
"loss": 0.1272, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 17.56810069509675, |
|
"grad_norm": 0.9109891653060913, |
|
"learning_rate": 4.349454009957471e-06, |
|
"loss": 0.1236, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 17.583129814014654, |
|
"grad_norm": 0.8152607679367065, |
|
"learning_rate": 4.296078885350607e-06, |
|
"loss": 0.1267, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 17.598158932932556, |
|
"grad_norm": 0.7224797606468201, |
|
"learning_rate": 4.2430185824415715e-06, |
|
"loss": 0.1355, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 17.61318805185046, |
|
"grad_norm": 0.7984783053398132, |
|
"learning_rate": 4.190273466724925e-06, |
|
"loss": 0.1364, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 17.628217170768362, |
|
"grad_norm": 0.9017600417137146, |
|
"learning_rate": 4.137843901524141e-06, |
|
"loss": 0.1281, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 17.643246289686267, |
|
"grad_norm": 0.7681065797805786, |
|
"learning_rate": 4.085730247989078e-06, |
|
"loss": 0.1234, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 17.65827540860417, |
|
"grad_norm": 0.7442010045051575, |
|
"learning_rate": 4.033932865093499e-06, |
|
"loss": 0.1331, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 17.673304527522074, |
|
"grad_norm": 0.7311212420463562, |
|
"learning_rate": 3.982452109632617e-06, |
|
"loss": 0.1336, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 17.68833364643998, |
|
"grad_norm": 0.7073860764503479, |
|
"learning_rate": 3.931288336220617e-06, |
|
"loss": 0.1263, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 17.70336276535788, |
|
"grad_norm": 0.6838569641113281, |
|
"learning_rate": 3.880441897288234e-06, |
|
"loss": 0.1299, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 17.718391884275785, |
|
"grad_norm": 0.9706346988677979, |
|
"learning_rate": 3.829913143080283e-06, |
|
"loss": 0.1276, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 17.733421003193687, |
|
"grad_norm": 0.7603088617324829, |
|
"learning_rate": 3.7797024216533138e-06, |
|
"loss": 0.1263, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 17.748450122111592, |
|
"grad_norm": 0.7066922187805176, |
|
"learning_rate": 3.729810078873125e-06, |
|
"loss": 0.1284, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 17.763479241029493, |
|
"grad_norm": 0.7454369068145752, |
|
"learning_rate": 3.6802364584124947e-06, |
|
"loss": 0.124, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 17.7785083599474, |
|
"grad_norm": 0.7552350759506226, |
|
"learning_rate": 3.6309819017487034e-06, |
|
"loss": 0.1259, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 17.7935374788653, |
|
"grad_norm": 0.8061559200286865, |
|
"learning_rate": 3.5820467481612496e-06, |
|
"loss": 0.126, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 17.808566597783205, |
|
"grad_norm": 0.6990138292312622, |
|
"learning_rate": 3.5334313347294757e-06, |
|
"loss": 0.1271, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 17.82359571670111, |
|
"grad_norm": 0.7601016163825989, |
|
"learning_rate": 3.4851359963302798e-06, |
|
"loss": 0.1397, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 17.83862483561901, |
|
"grad_norm": 0.7683603167533875, |
|
"learning_rate": 3.43716106563578e-06, |
|
"loss": 0.1376, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 17.853653954536917, |
|
"grad_norm": 0.8137221932411194, |
|
"learning_rate": 3.3895068731110534e-06, |
|
"loss": 0.122, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 17.868683073454818, |
|
"grad_norm": 0.8366261124610901, |
|
"learning_rate": 3.342173747011801e-06, |
|
"loss": 0.1273, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 17.883712192372723, |
|
"grad_norm": 0.8289967179298401, |
|
"learning_rate": 3.295162013382164e-06, |
|
"loss": 0.1274, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 17.898741311290625, |
|
"grad_norm": 0.6871482133865356, |
|
"learning_rate": 3.248471996052432e-06, |
|
"loss": 0.1357, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 17.91377043020853, |
|
"grad_norm": 0.7140630483627319, |
|
"learning_rate": 3.202104016636814e-06, |
|
"loss": 0.1247, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 17.92879954912643, |
|
"grad_norm": 0.7578158974647522, |
|
"learning_rate": 3.156058394531225e-06, |
|
"loss": 0.1285, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 17.943828668044336, |
|
"grad_norm": 0.718285858631134, |
|
"learning_rate": 3.1103354469111056e-06, |
|
"loss": 0.1285, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 17.958857786962238, |
|
"grad_norm": 0.7415304780006409, |
|
"learning_rate": 3.0649354887291925e-06, |
|
"loss": 0.1259, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 17.973886905880143, |
|
"grad_norm": 0.7331326007843018, |
|
"learning_rate": 3.019858832713435e-06, |
|
"loss": 0.1264, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 17.988916024798048, |
|
"grad_norm": 0.7621225714683533, |
|
"learning_rate": 2.9751057893647237e-06, |
|
"loss": 0.1306, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 18.00300582378358, |
|
"grad_norm": 0.6445237994194031, |
|
"learning_rate": 2.930676666954846e-06, |
|
"loss": 0.1289, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 18.018034942701483, |
|
"grad_norm": 0.6551523208618164, |
|
"learning_rate": 2.8865717715243212e-06, |
|
"loss": 0.123, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 18.03306406161939, |
|
"grad_norm": 0.6718552708625793, |
|
"learning_rate": 2.842791406880291e-06, |
|
"loss": 0.1254, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 18.04809318053729, |
|
"grad_norm": 0.653846263885498, |
|
"learning_rate": 2.7993358745944608e-06, |
|
"loss": 0.1237, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 18.063122299455195, |
|
"grad_norm": 0.7196510434150696, |
|
"learning_rate": 2.756205474000978e-06, |
|
"loss": 0.1162, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 18.078151418373096, |
|
"grad_norm": 0.6618478894233704, |
|
"learning_rate": 2.7134005021943852e-06, |
|
"loss": 0.117, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 18.093180537291, |
|
"grad_norm": 0.8368316292762756, |
|
"learning_rate": 2.670921254027592e-06, |
|
"loss": 0.1205, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 18.108209656208906, |
|
"grad_norm": 0.6879215836524963, |
|
"learning_rate": 2.6287680221098233e-06, |
|
"loss": 0.1171, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 18.123238775126808, |
|
"grad_norm": 0.7069093585014343, |
|
"learning_rate": 2.5869410968046294e-06, |
|
"loss": 0.1235, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 18.138267894044713, |
|
"grad_norm": 0.6723190546035767, |
|
"learning_rate": 2.5454407662278244e-06, |
|
"loss": 0.1085, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 18.153297012962614, |
|
"grad_norm": 0.6698660850524902, |
|
"learning_rate": 2.5042673162455954e-06, |
|
"loss": 0.1195, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 18.16832613188052, |
|
"grad_norm": 0.6730449795722961, |
|
"learning_rate": 2.463421030472429e-06, |
|
"loss": 0.1139, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 18.18335525079842, |
|
"grad_norm": 0.805294394493103, |
|
"learning_rate": 2.422902190269266e-06, |
|
"loss": 0.1242, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 18.198384369716326, |
|
"grad_norm": 1.0811830759048462, |
|
"learning_rate": 2.3827110747414785e-06, |
|
"loss": 0.1195, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 18.213413488634227, |
|
"grad_norm": 0.6854028105735779, |
|
"learning_rate": 2.342847960736966e-06, |
|
"loss": 0.119, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 18.228442607552132, |
|
"grad_norm": 0.6735851764678955, |
|
"learning_rate": 2.303313122844286e-06, |
|
"loss": 0.1321, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 18.243471726470037, |
|
"grad_norm": 0.7301083207130432, |
|
"learning_rate": 2.264106833390722e-06, |
|
"loss": 0.1204, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 18.25850084538794, |
|
"grad_norm": 0.7372903823852539, |
|
"learning_rate": 2.2252293624404176e-06, |
|
"loss": 0.1201, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 18.273529964305844, |
|
"grad_norm": 0.6305893659591675, |
|
"learning_rate": 2.1866809777925324e-06, |
|
"loss": 0.1128, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 18.288559083223745, |
|
"grad_norm": 0.7112670540809631, |
|
"learning_rate": 2.148461944979385e-06, |
|
"loss": 0.1172, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 18.30358820214165, |
|
"grad_norm": 0.6915646195411682, |
|
"learning_rate": 2.1105725272646094e-06, |
|
"loss": 0.1197, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 18.318617321059552, |
|
"grad_norm": 0.6650305986404419, |
|
"learning_rate": 2.0730129856413707e-06, |
|
"loss": 0.121, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 18.333646439977457, |
|
"grad_norm": 0.6500080823898315, |
|
"learning_rate": 2.0357835788305467e-06, |
|
"loss": 0.1209, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 18.34867555889536, |
|
"grad_norm": 0.7032843828201294, |
|
"learning_rate": 1.998884563278963e-06, |
|
"loss": 0.1194, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 18.363704677813264, |
|
"grad_norm": 0.6876169443130493, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 0.117, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 18.378733796731165, |
|
"grad_norm": 0.6640487909317017, |
|
"learning_rate": 1.926078720359853e-06, |
|
"loss": 0.1246, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 18.39376291564907, |
|
"grad_norm": 0.7534406185150146, |
|
"learning_rate": 1.8901723944998118e-06, |
|
"loss": 0.1175, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 18.408792034566975, |
|
"grad_norm": 0.7041878700256348, |
|
"learning_rate": 1.8545974629105622e-06, |
|
"loss": 0.1191, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 18.423821153484877, |
|
"grad_norm": 0.6589450240135193, |
|
"learning_rate": 1.81935417064239e-06, |
|
"loss": 0.1155, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 18.43885027240278, |
|
"grad_norm": 0.6730456352233887, |
|
"learning_rate": 1.7844427604612024e-06, |
|
"loss": 0.1283, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 18.453879391320683, |
|
"grad_norm": 0.7545807361602783, |
|
"learning_rate": 1.74986347284678e-06, |
|
"loss": 0.114, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 18.468908510238588, |
|
"grad_norm": 0.720689058303833, |
|
"learning_rate": 1.7156165459911665e-06, |
|
"loss": 0.1228, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 18.48393762915649, |
|
"grad_norm": 0.6629992723464966, |
|
"learning_rate": 1.6817022157970042e-06, |
|
"loss": 0.1171, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 18.498966748074395, |
|
"grad_norm": 0.6659217476844788, |
|
"learning_rate": 1.648120715875906e-06, |
|
"loss": 0.1133, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 18.513995866992296, |
|
"grad_norm": 0.6609564423561096, |
|
"learning_rate": 1.6148722775468639e-06, |
|
"loss": 0.1343, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 18.5290249859102, |
|
"grad_norm": 0.6903553009033203, |
|
"learning_rate": 1.581957129834638e-06, |
|
"loss": 0.1182, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 18.544054104828106, |
|
"grad_norm": 0.7767003178596497, |
|
"learning_rate": 1.5493754994681976e-06, |
|
"loss": 0.122, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 18.559083223746008, |
|
"grad_norm": 0.6776891350746155, |
|
"learning_rate": 1.5171276108791544e-06, |
|
"loss": 0.1129, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 18.574112342663913, |
|
"grad_norm": 0.6937426924705505, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 0.1136, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 18.589141461581814, |
|
"grad_norm": 0.7074488401412964, |
|
"learning_rate": 1.4536339452635384e-06, |
|
"loss": 0.1126, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 18.60417058049972, |
|
"grad_norm": 0.6760552525520325, |
|
"learning_rate": 1.4223886055995172e-06, |
|
"loss": 0.1227, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 18.61919969941762, |
|
"grad_norm": 0.7237436175346375, |
|
"learning_rate": 1.3914778824349884e-06, |
|
"loss": 0.1208, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 18.634228818335526, |
|
"grad_norm": 0.6534668803215027, |
|
"learning_rate": 1.3609019886918427e-06, |
|
"loss": 0.1171, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 18.649257937253427, |
|
"grad_norm": 0.6551641225814819, |
|
"learning_rate": 1.3306611349856112e-06, |
|
"loss": 0.1184, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 18.664287056171332, |
|
"grad_norm": 0.681528627872467, |
|
"learning_rate": 1.300755529623937e-06, |
|
"loss": 0.1203, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 18.679316175089234, |
|
"grad_norm": 0.7110047340393066, |
|
"learning_rate": 1.2711853786052109e-06, |
|
"loss": 0.1227, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 18.69434529400714, |
|
"grad_norm": 0.7127984166145325, |
|
"learning_rate": 1.241950885617088e-06, |
|
"loss": 0.1192, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 18.709374412925044, |
|
"grad_norm": 0.9400015473365784, |
|
"learning_rate": 1.2130522520351405e-06, |
|
"loss": 0.1206, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 18.724403531842945, |
|
"grad_norm": 0.640738844871521, |
|
"learning_rate": 1.1844896769214186e-06, |
|
"loss": 0.125, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 18.73943265076085, |
|
"grad_norm": 0.6960272789001465, |
|
"learning_rate": 1.1562633570231352e-06, |
|
"loss": 0.1181, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 18.754461769678752, |
|
"grad_norm": 0.7713277339935303, |
|
"learning_rate": 1.128373486771256e-06, |
|
"loss": 0.1183, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 18.769490888596657, |
|
"grad_norm": 0.6949428915977478, |
|
"learning_rate": 1.1008202582792004e-06, |
|
"loss": 0.1308, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 18.78452000751456, |
|
"grad_norm": 0.6489851474761963, |
|
"learning_rate": 1.0736038613414878e-06, |
|
"loss": 0.1288, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 18.799549126432463, |
|
"grad_norm": 0.7511118054389954, |
|
"learning_rate": 1.0467244834324707e-06, |
|
"loss": 0.1098, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 18.814578245350365, |
|
"grad_norm": 0.7278922200202942, |
|
"learning_rate": 1.0201823097049812e-06, |
|
"loss": 0.1248, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 18.82960736426827, |
|
"grad_norm": 0.7048822641372681, |
|
"learning_rate": 9.939775229891313e-07, |
|
"loss": 0.1201, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 18.84463648318617, |
|
"grad_norm": 0.7828486561775208, |
|
"learning_rate": 9.681103037909866e-07, |
|
"loss": 0.1271, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 18.859665602104076, |
|
"grad_norm": 0.6916821002960205, |
|
"learning_rate": 9.42580830291373e-07, |
|
"loss": 0.1151, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 18.87469472102198, |
|
"grad_norm": 0.7299247980117798, |
|
"learning_rate": 9.173892783445992e-07, |
|
"loss": 0.1287, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 18.889723839939883, |
|
"grad_norm": 0.8514544367790222, |
|
"learning_rate": 8.925358214772972e-07, |
|
"loss": 0.1261, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 18.904752958857788, |
|
"grad_norm": 0.6913233995437622, |
|
"learning_rate": 8.680206308871952e-07, |
|
"loss": 0.1091, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 18.91978207777569, |
|
"grad_norm": 0.7069427967071533, |
|
"learning_rate": 8.43843875441952e-07, |
|
"loss": 0.1242, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 18.934811196693595, |
|
"grad_norm": 0.6860793232917786, |
|
"learning_rate": 8.2000572167798e-07, |
|
"loss": 0.1245, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 18.949840315611496, |
|
"grad_norm": 0.6952442526817322, |
|
"learning_rate": 7.965063337993017e-07, |
|
"loss": 0.1194, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 18.9648694345294, |
|
"grad_norm": 0.7195196747779846, |
|
"learning_rate": 7.733458736764398e-07, |
|
"loss": 0.1266, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 18.979898553447303, |
|
"grad_norm": 0.685310959815979, |
|
"learning_rate": 7.505245008452788e-07, |
|
"loss": 0.1153, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 18.994927672365208, |
|
"grad_norm": 0.6967130899429321, |
|
"learning_rate": 7.280423725059604e-07, |
|
"loss": 0.1331, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 19.00901747135074, |
|
"grad_norm": 0.5955845713615417, |
|
"learning_rate": 7.058996435218346e-07, |
|
"loss": 0.1032, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 19.024046590268647, |
|
"grad_norm": 0.6826702356338501, |
|
"learning_rate": 6.840964664183436e-07, |
|
"loss": 0.1116, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 19.039075709186548, |
|
"grad_norm": 0.6504730582237244, |
|
"learning_rate": 6.626329913820339e-07, |
|
"loss": 0.1218, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 19.054104828104453, |
|
"grad_norm": 0.6690040230751038, |
|
"learning_rate": 6.415093662594629e-07, |
|
"loss": 0.1218, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 19.069133947022355, |
|
"grad_norm": 0.7162594199180603, |
|
"learning_rate": 6.207257365562047e-07, |
|
"loss": 0.1148, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 19.08416306594026, |
|
"grad_norm": 0.6570801734924316, |
|
"learning_rate": 6.00282245435857e-07, |
|
"loss": 0.1138, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 19.09919218485816, |
|
"grad_norm": 0.6705721616744995, |
|
"learning_rate": 5.80179033719036e-07, |
|
"loss": 0.1241, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 19.114221303776066, |
|
"grad_norm": 0.7230423092842102, |
|
"learning_rate": 5.604162398824275e-07, |
|
"loss": 0.1122, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 19.12925042269397, |
|
"grad_norm": 0.6463306546211243, |
|
"learning_rate": 5.409940000578206e-07, |
|
"loss": 0.1085, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 19.144279541611873, |
|
"grad_norm": 0.7528629302978516, |
|
"learning_rate": 5.219124480311532e-07, |
|
"loss": 0.1186, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 19.159308660529778, |
|
"grad_norm": 1.4888911247253418, |
|
"learning_rate": 5.031717152416238e-07, |
|
"loss": 0.1158, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 19.17433777944768, |
|
"grad_norm": 0.6441943645477295, |
|
"learning_rate": 4.847719307807752e-07, |
|
"loss": 0.1197, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 19.189366898365584, |
|
"grad_norm": 0.6627583503723145, |
|
"learning_rate": 4.6671322139158477e-07, |
|
"loss": 0.1168, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 19.204396017283486, |
|
"grad_norm": 0.6732495427131653, |
|
"learning_rate": 4.4899571146761467e-07, |
|
"loss": 0.1104, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 19.21942513620139, |
|
"grad_norm": 0.6743932366371155, |
|
"learning_rate": 4.3161952305215136e-07, |
|
"loss": 0.1185, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 19.234454255119292, |
|
"grad_norm": 0.7038917541503906, |
|
"learning_rate": 4.145847758373511e-07, |
|
"loss": 0.1216, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 19.249483374037197, |
|
"grad_norm": 0.6505002975463867, |
|
"learning_rate": 3.9789158716343475e-07, |
|
"loss": 0.1247, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 19.2645124929551, |
|
"grad_norm": 0.6234051585197449, |
|
"learning_rate": 3.815400720178719e-07, |
|
"loss": 0.1122, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 19.279541611873004, |
|
"grad_norm": 0.6669496297836304, |
|
"learning_rate": 3.6553034303457577e-07, |
|
"loss": 0.1127, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 19.29457073079091, |
|
"grad_norm": 0.7005789279937744, |
|
"learning_rate": 3.49862510493143e-07, |
|
"loss": 0.1135, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 19.30959984970881, |
|
"grad_norm": 0.7209417223930359, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 0.115, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 19.324628968626715, |
|
"grad_norm": 0.670708179473877, |
|
"learning_rate": 3.1955296407811807e-07, |
|
"loss": 0.1147, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 19.339658087544617, |
|
"grad_norm": 0.6531425714492798, |
|
"learning_rate": 3.0491145898536856e-07, |
|
"loss": 0.1153, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 19.354687206462522, |
|
"grad_norm": 0.6748098134994507, |
|
"learning_rate": 2.9061226789471873e-07, |
|
"loss": 0.1098, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 19.369716325380423, |
|
"grad_norm": 0.7407058477401733, |
|
"learning_rate": 2.7665548930308484e-07, |
|
"loss": 0.1186, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 19.38474544429833, |
|
"grad_norm": 0.7474448680877686, |
|
"learning_rate": 2.6304121934876966e-07, |
|
"loss": 0.1167, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 19.39977456321623, |
|
"grad_norm": 0.710455596446991, |
|
"learning_rate": 2.497695518107579e-07, |
|
"loss": 0.1256, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 19.414803682134135, |
|
"grad_norm": 0.674196183681488, |
|
"learning_rate": 2.3684057810808847e-07, |
|
"loss": 0.1199, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 19.42983280105204, |
|
"grad_norm": 0.6443490982055664, |
|
"learning_rate": 2.2425438729924419e-07, |
|
"loss": 0.1134, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 19.44486191996994, |
|
"grad_norm": 0.6689858436584473, |
|
"learning_rate": 2.120110660815078e-07, |
|
"loss": 0.1213, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 19.459891038887847, |
|
"grad_norm": 0.6597970128059387, |
|
"learning_rate": 2.0011069879038447e-07, |
|
"loss": 0.127, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 19.474920157805748, |
|
"grad_norm": 0.6606748104095459, |
|
"learning_rate": 1.8855336739901363e-07, |
|
"loss": 0.1184, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 19.489949276723653, |
|
"grad_norm": 0.6770042181015015, |
|
"learning_rate": 1.773391515176026e-07, |
|
"loss": 0.1199, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 19.504978395641555, |
|
"grad_norm": 0.6483029723167419, |
|
"learning_rate": 1.6646812839287706e-07, |
|
"loss": 0.1094, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 19.52000751455946, |
|
"grad_norm": 0.6776772737503052, |
|
"learning_rate": 1.5594037290755925e-07, |
|
"loss": 0.115, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 19.53503663347736, |
|
"grad_norm": 0.6734815835952759, |
|
"learning_rate": 1.4575595757985173e-07, |
|
"loss": 0.1176, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 19.550065752395266, |
|
"grad_norm": 0.671363353729248, |
|
"learning_rate": 1.3591495256291554e-07, |
|
"loss": 0.1158, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 19.565094871313168, |
|
"grad_norm": 0.7096564769744873, |
|
"learning_rate": 1.2641742564441506e-07, |
|
"loss": 0.1178, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 19.580123990231073, |
|
"grad_norm": 0.7112547755241394, |
|
"learning_rate": 1.1726344224603502e-07, |
|
"loss": 0.1186, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 19.595153109148978, |
|
"grad_norm": 0.9371479153633118, |
|
"learning_rate": 1.0845306542303645e-07, |
|
"loss": 0.1158, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 19.61018222806688, |
|
"grad_norm": 0.666856050491333, |
|
"learning_rate": 9.998635586381255e-08, |
|
"loss": 0.1151, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 19.625211346984784, |
|
"grad_norm": 0.6255350708961487, |
|
"learning_rate": 9.186337188949457e-08, |
|
"loss": 0.1287, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 19.640240465902686, |
|
"grad_norm": 0.6888746619224548, |
|
"learning_rate": 8.408416945351328e-08, |
|
"loss": 0.119, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 19.65526958482059, |
|
"grad_norm": 0.6902468204498291, |
|
"learning_rate": 7.664880214123815e-08, |
|
"loss": 0.1199, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 19.670298703738492, |
|
"grad_norm": 0.6694928407669067, |
|
"learning_rate": 6.95573211696221e-08, |
|
"loss": 0.1262, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 19.685327822656397, |
|
"grad_norm": 0.6304376125335693, |
|
"learning_rate": 6.280977538681288e-08, |
|
"loss": 0.1196, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 19.7003569415743, |
|
"grad_norm": 0.7109536528587341, |
|
"learning_rate": 5.64062112718311e-08, |
|
"loss": 0.1158, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 19.715386060492204, |
|
"grad_norm": 0.6978461146354675, |
|
"learning_rate": 5.0346672934270534e-08, |
|
"loss": 0.1139, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 19.73041517941011, |
|
"grad_norm": 0.6379060745239258, |
|
"learning_rate": 4.4631202113953886e-08, |
|
"loss": 0.1157, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 19.74544429832801, |
|
"grad_norm": 0.6268938779830933, |
|
"learning_rate": 3.925983818069412e-08, |
|
"loss": 0.1086, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 19.760473417245915, |
|
"grad_norm": 0.7297201156616211, |
|
"learning_rate": 3.4232618133978044e-08, |
|
"loss": 0.1132, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 19.775502536163817, |
|
"grad_norm": 0.6648380756378174, |
|
"learning_rate": 2.9549576602733164e-08, |
|
"loss": 0.1124, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 19.790531655081722, |
|
"grad_norm": 0.7137235999107361, |
|
"learning_rate": 2.5210745845100082e-08, |
|
"loss": 0.1165, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 19.805560773999623, |
|
"grad_norm": 0.6801294684410095, |
|
"learning_rate": 2.1216155748182696e-08, |
|
"loss": 0.1155, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 19.82058989291753, |
|
"grad_norm": 0.719840407371521, |
|
"learning_rate": 1.756583382785948e-08, |
|
"loss": 0.1261, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 19.83561901183543, |
|
"grad_norm": 0.6777321696281433, |
|
"learning_rate": 1.4259805228594713e-08, |
|
"loss": 0.1172, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 19.850648130753335, |
|
"grad_norm": 0.6588504314422607, |
|
"learning_rate": 1.129809272326643e-08, |
|
"loss": 0.1151, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 19.865677249671236, |
|
"grad_norm": 0.6828821897506714, |
|
"learning_rate": 8.680716712988756e-09, |
|
"loss": 0.1176, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 19.88070636858914, |
|
"grad_norm": 0.7881568670272827, |
|
"learning_rate": 6.40769522700091e-09, |
|
"loss": 0.1212, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 19.895735487507046, |
|
"grad_norm": 0.6444976329803467, |
|
"learning_rate": 4.479043922528403e-09, |
|
"loss": 0.1141, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 19.910764606424948, |
|
"grad_norm": 0.6598045825958252, |
|
"learning_rate": 2.894776084672035e-09, |
|
"loss": 0.1181, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 19.925793725342853, |
|
"grad_norm": 0.6139656901359558, |
|
"learning_rate": 1.654902626324617e-09, |
|
"loss": 0.1222, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 19.940822844260754, |
|
"grad_norm": 0.6389946341514587, |
|
"learning_rate": 7.594320880821571e-10, |
|
"loss": 0.1218, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 19.95585196317866, |
|
"grad_norm": 0.6922657489776611, |
|
"learning_rate": 2.0837063821055326e-10, |
|
"loss": 0.1139, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 19.97088108209656, |
|
"grad_norm": 0.6712486743927002, |
|
"learning_rate": 1.7220725789801607e-12, |
|
"loss": 0.1172, |
|
"step": 13300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.732273085924172e+20, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|