|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.858356940509914, |
|
"global_step": 4400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9999745104274995e-05, |
|
"loss": 1.1665, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9998980430094333e-05, |
|
"loss": 1.1142, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9997706016440462e-05, |
|
"loss": 1.0657, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9995921928281893e-05, |
|
"loss": 1.0229, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.999362825656992e-05, |
|
"loss": 1.0155, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9990825118233958e-05, |
|
"loss": 0.9919, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.9987512656175612e-05, |
|
"loss": 0.9623, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9983691039261358e-05, |
|
"loss": 0.937, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9979360462313965e-05, |
|
"loss": 0.9527, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.9974521146102535e-05, |
|
"loss": 0.9173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.9969173337331283e-05, |
|
"loss": 0.91, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.9963317308626916e-05, |
|
"loss": 0.9045, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.9956953358524774e-05, |
|
"loss": 0.94, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.9950081811453598e-05, |
|
"loss": 0.9086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.9942703017718977e-05, |
|
"loss": 0.8946, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.99348173534855e-05, |
|
"loss": 0.8836, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9926425220757607e-05, |
|
"loss": 0.9001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.991752704735903e-05, |
|
"loss": 0.8826, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.990812328691105e-05, |
|
"loss": 0.8549, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.989821441880933e-05, |
|
"loss": 0.869, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.9887800948199496e-05, |
|
"loss": 0.8772, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 0.8691, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.9865462348631945e-05, |
|
"loss": 0.8978, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.9853538358476933e-05, |
|
"loss": 0.8576, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.984111204336116e-05, |
|
"loss": 0.8506, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.9828184036767556e-05, |
|
"loss": 0.8679, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.981475499775484e-05, |
|
"loss": 0.888, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.9800825610923937e-05, |
|
"loss": 0.8369, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.9786396586383078e-05, |
|
"loss": 0.8244, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.9771468659711595e-05, |
|
"loss": 0.8495, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.9756042591922436e-05, |
|
"loss": 0.85, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.9740119169423337e-05, |
|
"loss": 0.8307, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.9723699203976768e-05, |
|
"loss": 0.8419, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.9706783532658528e-05, |
|
"loss": 0.8483, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.9689373017815076e-05, |
|
"loss": 0.8181, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.9671468547019575e-05, |
|
"loss": 0.8203, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.9653071033026635e-05, |
|
"loss": 0.8132, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.963418141372579e-05, |
|
"loss": 0.8299, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.9614800652093685e-05, |
|
"loss": 0.8228, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.9594929736144978e-05, |
|
"loss": 0.8083, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.9574569678881965e-05, |
|
"loss": 0.8054, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.955372151824297e-05, |
|
"loss": 0.8127, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.9532386317049387e-05, |
|
"loss": 0.8047, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.9510565162951538e-05, |
|
"loss": 0.8113, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 1.9488259168373198e-05, |
|
"loss": 0.7806, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.94654694704549e-05, |
|
"loss": 0.7973, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.944219723099596e-05, |
|
"loss": 0.7968, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 1.941844363639525e-05, |
|
"loss": 0.7838, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 1.9394209897590707e-05, |
|
"loss": 0.7798, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 1.936949724999762e-05, |
|
"loss": 0.7791, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.9344306953445632e-05, |
|
"loss": 0.7632, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.9318640292114526e-05, |
|
"loss": 0.7984, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.929249857446875e-05, |
|
"loss": 0.7763, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 1.9265883133190715e-05, |
|
"loss": 0.7489, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.9238795325112867e-05, |
|
"loss": 0.7739, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.92112365311485e-05, |
|
"loss": 0.7664, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.918320815622137e-05, |
|
"loss": 0.76, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.9154711629194062e-05, |
|
"loss": 0.7604, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.912574840279516e-05, |
|
"loss": 0.7598, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.9096319953545186e-05, |
|
"loss": 0.7585, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.9066427781681314e-05, |
|
"loss": 0.7312, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.9036073411080917e-05, |
|
"loss": 0.732, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.900525838918385e-05, |
|
"loss": 0.7392, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.8973984286913584e-05, |
|
"loss": 0.6986, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.8942252698597113e-05, |
|
"loss": 0.7281, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.891006524188368e-05, |
|
"loss": 0.7316, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.8877423557662307e-05, |
|
"loss": 0.7329, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.8844329309978146e-05, |
|
"loss": 0.7409, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 1.8810784185947648e-05, |
|
"loss": 0.7345, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.8776789895672557e-05, |
|
"loss": 0.7418, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 1.8742348172152728e-05, |
|
"loss": 0.7426, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 1.8707460771197773e-05, |
|
"loss": 0.6952, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 1.8672129471337568e-05, |
|
"loss": 0.7136, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 1.863635607373157e-05, |
|
"loss": 0.7284, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 1.8600142402077006e-05, |
|
"loss": 0.6978, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 1.856349030251589e-05, |
|
"loss": 0.6843, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 1.8526401643540924e-05, |
|
"loss": 0.6895, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 1.8488878315900228e-05, |
|
"loss": 0.7084, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.8450922232500966e-05, |
|
"loss": 0.7028, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.8412535328311813e-05, |
|
"loss": 0.6905, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 1.837371956026433e-05, |
|
"loss": 0.6851, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 1.8334476907153177e-05, |
|
"loss": 0.6759, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 1.8294809369535265e-05, |
|
"loss": 0.6613, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 1.825471896962774e-05, |
|
"loss": 0.6814, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.8214207751204917e-05, |
|
"loss": 0.7044, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.817327777949407e-05, |
|
"loss": 0.6753, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 1.8131931141070166e-05, |
|
"loss": 0.6748, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 0.6853, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 1.8047996316482134e-05, |
|
"loss": 0.6406, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 1.8005412409243604e-05, |
|
"loss": 0.6391, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 1.7962420392925066e-05, |
|
"loss": 0.6524, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 1.7919022459222754e-05, |
|
"loss": 0.6618, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 1.787522082052622e-05, |
|
"loss": 0.6604, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 1.7831017709805555e-05, |
|
"loss": 0.6706, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 1.778641538049755e-05, |
|
"loss": 0.6589, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 1.7741416106390828e-05, |
|
"loss": 0.6631, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 1.7696022181509892e-05, |
|
"loss": 0.6408, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 1.7650235919998234e-05, |
|
"loss": 0.6077, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 1.7604059656000313e-05, |
|
"loss": 0.6267, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 1.7557495743542586e-05, |
|
"loss": 0.6324, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 1.75105465564135e-05, |
|
"loss": 0.645, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 1.7463214488042472e-05, |
|
"loss": 0.6167, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 1.741550195137788e-05, |
|
"loss": 0.6311, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 1.736741137876405e-05, |
|
"loss": 0.6329, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 1.7318945221817255e-05, |
|
"loss": 0.6469, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 1.727010595130074e-05, |
|
"loss": 0.6334, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 1.7220896056998753e-05, |
|
"loss": 0.5946, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 1.7171318047589637e-05, |
|
"loss": 0.5695, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 1.712137445051792e-05, |
|
"loss": 0.6101, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.6203, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 1.7020400696221737e-05, |
|
"loss": 0.598, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 1.696937568655294e-05, |
|
"loss": 0.6177, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.691799538407044e-05, |
|
"loss": 0.6153, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 1.6866262408098134e-05, |
|
"loss": 0.6096, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 1.6814179395938915e-05, |
|
"loss": 0.612, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 1.6761749002740195e-05, |
|
"loss": 0.5858, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 1.6708973901358603e-05, |
|
"loss": 0.5715, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 1.6655856782223682e-05, |
|
"loss": 0.5834, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 1.660240035320075e-05, |
|
"loss": 0.5782, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 1.6548607339452853e-05, |
|
"loss": 0.5621, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 1.6494480483301836e-05, |
|
"loss": 0.5729, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.6440022544088553e-05, |
|
"loss": 0.5958, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 1.6385236298032183e-05, |
|
"loss": 0.6007, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 1.6330124538088705e-05, |
|
"loss": 0.5658, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 1.627469007380852e-05, |
|
"loss": 0.5674, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 1.6218935731193223e-05, |
|
"loss": 0.5649, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"learning_rate": 1.616286435255153e-05, |
|
"loss": 0.5497, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 1.6106478796354382e-05, |
|
"loss": 0.5865, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 1.6049781937089227e-05, |
|
"loss": 0.547, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 1.599277666511347e-05, |
|
"loss": 0.5473, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 1.5935465886507143e-05, |
|
"loss": 0.5553, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 0.5672, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 1.581993951144626e-05, |
|
"loss": 0.5392, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 1.576172980442753e-05, |
|
"loss": 0.504, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 1.5703226369349642e-05, |
|
"loss": 0.5446, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 1.5644432188667695e-05, |
|
"loss": 0.5323, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 1.5585350259658748e-05, |
|
"loss": 0.54, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 1.5525983594269026e-05, |
|
"loss": 0.5387, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 1.546633521896038e-05, |
|
"loss": 0.5349, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 1.5406408174555978e-05, |
|
"loss": 0.5303, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 1.5346205516085305e-05, |
|
"loss": 0.561, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 1.528573031262842e-05, |
|
"loss": 0.5295, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 1.5224985647159489e-05, |
|
"loss": 0.4995, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 1.5163974616389621e-05, |
|
"loss": 0.5151, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 1.5102700330609e-05, |
|
"loss": 0.5042, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 1.504116591352832e-05, |
|
"loss": 0.502, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 1.497937450211956e-05, |
|
"loss": 0.5101, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 1.491732924645604e-05, |
|
"loss": 0.5323, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 1.4855033309551842e-05, |
|
"loss": 0.509, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.479248986720057e-05, |
|
"loss": 0.5292, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 1.4729702107813438e-05, |
|
"loss": 0.4835, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 1.4666673232256738e-05, |
|
"loss": 0.4934, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 1.4603406453688656e-05, |
|
"loss": 0.4811, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 1.4539904997395468e-05, |
|
"loss": 0.4966, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 1.4476172100627127e-05, |
|
"loss": 0.4906, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 1.4412211012432213e-05, |
|
"loss": 0.4956, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 1.4348024993492323e-05, |
|
"loss": 0.5003, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 1.4283617315955815e-05, |
|
"loss": 0.5131, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 1.4218991263271024e-05, |
|
"loss": 0.4642, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 1.4154150130018867e-05, |
|
"loss": 0.4679, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 1.408909722174487e-05, |
|
"loss": 0.4555, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 1.4023835854790682e-05, |
|
"loss": 0.4663, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"learning_rate": 1.3958369356124986e-05, |
|
"loss": 0.4991, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 1.3892701063173917e-05, |
|
"loss": 0.4592, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 0.4697, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 1.3760772495385998e-05, |
|
"loss": 0.4812, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 1.369451894615474e-05, |
|
"loss": 0.4868, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 1.362807705350641e-05, |
|
"loss": 0.4511, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 1.3561450204591898e-05, |
|
"loss": 0.4332, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 1.3494641795990986e-05, |
|
"loss": 0.4392, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 1.3427655233539227e-05, |
|
"loss": 0.435, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 1.3360493932154301e-05, |
|
"loss": 0.4377, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 1.3293161315661934e-05, |
|
"loss": 0.4509, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 1.3225660816621342e-05, |
|
"loss": 0.4564, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 1.3157995876150252e-05, |
|
"loss": 0.471, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 0.4759, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 20.06, |
|
"learning_rate": 1.302218647712706e-05, |
|
"loss": 0.454, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 1.2954048942022002e-05, |
|
"loss": 0.3999, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 20.28, |
|
"learning_rate": 1.288576081202759e-05, |
|
"loss": 0.415, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 1.2817325568414299e-05, |
|
"loss": 0.4349, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 1.2748746699952338e-05, |
|
"loss": 0.4281, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 1.2680027702733791e-05, |
|
"loss": 0.4391, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 1.2611172079994377e-05, |
|
"loss": 0.4288, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 20.85, |
|
"learning_rate": 1.2542183341934873e-05, |
|
"loss": 0.4562, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 1.2473065005542155e-05, |
|
"loss": 0.4294, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 1.2403820594409926e-05, |
|
"loss": 0.4147, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"learning_rate": 1.2334453638559057e-05, |
|
"loss": 0.4003, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 1.2264967674257647e-05, |
|
"loss": 0.402, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"learning_rate": 1.2195366243840745e-05, |
|
"loss": 0.3808, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"learning_rate": 1.2125652895529766e-05, |
|
"loss": 0.4199, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 1.2055831183251608e-05, |
|
"loss": 0.4153, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 1.1985904666457455e-05, |
|
"loss": 0.4206, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 1.1915876909941356e-05, |
|
"loss": 0.4326, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"learning_rate": 1.1845751483658454e-05, |
|
"loss": 0.4263, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 22.1, |
|
"learning_rate": 1.1775531962543036e-05, |
|
"loss": 0.3938, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 22.21, |
|
"learning_rate": 1.170522192632624e-05, |
|
"loss": 0.3748, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 22.32, |
|
"learning_rate": 1.1634824959353602e-05, |
|
"loss": 0.3757, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 1.156434465040231e-05, |
|
"loss": 0.3876, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 1.1493784592498252e-05, |
|
"loss": 0.395, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 22.66, |
|
"learning_rate": 1.1423148382732854e-05, |
|
"loss": 0.3938, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 1.1352439622079689e-05, |
|
"loss": 0.4146, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"learning_rate": 1.1281661915210931e-05, |
|
"loss": 0.4206, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 1.1210818870313548e-05, |
|
"loss": 0.3975, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 23.12, |
|
"learning_rate": 1.1139914098905406e-05, |
|
"loss": 0.3613, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 1.1068951215651132e-05, |
|
"loss": 0.3572, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 23.34, |
|
"learning_rate": 1.0997933838177828e-05, |
|
"loss": 0.3773, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 23.46, |
|
"learning_rate": 1.0926865586890689e-05, |
|
"loss": 0.3846, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 23.57, |
|
"learning_rate": 1.08557500847884e-05, |
|
"loss": 0.3758, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 1.0784590957278452e-05, |
|
"loss": 0.393, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 1.0713391831992324e-05, |
|
"loss": 0.3699, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 1.064215633860055e-05, |
|
"loss": 0.3639, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 1.0570888108627682e-05, |
|
"loss": 0.3869, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 1.0499590775267142e-05, |
|
"loss": 0.3387, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 1.0428267973196027e-05, |
|
"loss": 0.3507, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 1.0356923338389807e-05, |
|
"loss": 0.3424, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 1.0285560507936962e-05, |
|
"loss": 0.3615, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 1.0214183119853583e-05, |
|
"loss": 0.3585, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 24.7, |
|
"learning_rate": 1.0142794812897874e-05, |
|
"loss": 0.3519, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"learning_rate": 1.0071399226384695e-05, |
|
"loss": 0.3734, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3788, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 25.04, |
|
"learning_rate": 9.928600773615306e-06, |
|
"loss": 0.3616, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 25.16, |
|
"learning_rate": 9.85720518710213e-06, |
|
"loss": 0.3345, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 25.27, |
|
"learning_rate": 9.78581688014642e-06, |
|
"loss": 0.3483, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 25.38, |
|
"learning_rate": 9.71443949206304e-06, |
|
"loss": 0.327, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"learning_rate": 9.643076661610197e-06, |
|
"loss": 0.3422, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 25.61, |
|
"learning_rate": 9.571732026803978e-06, |
|
"loss": 0.3472, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 9.500409224732863e-06, |
|
"loss": 0.3475, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 25.84, |
|
"learning_rate": 9.42911189137232e-06, |
|
"loss": 0.3507, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 9.357843661399447e-06, |
|
"loss": 0.3534, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 9.286608168007678e-06, |
|
"loss": 0.3295, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"learning_rate": 9.215409042721553e-06, |
|
"loss": 0.3187, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 9.144249915211605e-06, |
|
"loss": 0.3465, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 9.073134413109313e-06, |
|
"loss": 0.3262, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 26.52, |
|
"learning_rate": 9.002066161822174e-06, |
|
"loss": 0.3154, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 26.63, |
|
"learning_rate": 8.931048784348875e-06, |
|
"loss": 0.3277, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"learning_rate": 8.860085901094595e-06, |
|
"loss": 0.3387, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 8.789181129686452e-06, |
|
"loss": 0.323, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"learning_rate": 8.718338084789074e-06, |
|
"loss": 0.3243, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"learning_rate": 8.647560377920311e-06, |
|
"loss": 0.3112, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 8.576851617267151e-06, |
|
"loss": 0.3138, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 27.31, |
|
"learning_rate": 8.50621540750175e-06, |
|
"loss": 0.294, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"learning_rate": 8.43565534959769e-06, |
|
"loss": 0.3009, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 8.365175040646403e-06, |
|
"loss": 0.3217, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 27.65, |
|
"learning_rate": 8.294778073673762e-06, |
|
"loss": 0.3083, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 27.76, |
|
"learning_rate": 8.224468037456969e-06, |
|
"loss": 0.3201, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 27.88, |
|
"learning_rate": 8.154248516341547e-06, |
|
"loss": 0.3402, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 8.084123090058646e-06, |
|
"loss": 0.3128, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"learning_rate": 8.014095333542548e-06, |
|
"loss": 0.2901, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 28.22, |
|
"learning_rate": 7.944168816748396e-06, |
|
"loss": 0.2901, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 7.874347104470234e-06, |
|
"loss": 0.2886, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"learning_rate": 7.804633756159258e-06, |
|
"loss": 0.2953, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 28.56, |
|
"learning_rate": 7.735032325742355e-06, |
|
"loss": 0.3088, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 28.67, |
|
"learning_rate": 7.66554636144095e-06, |
|
"loss": 0.3004, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 28.78, |
|
"learning_rate": 7.596179405590076e-06, |
|
"loss": 0.3299, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"learning_rate": 7.5269349944578454e-06, |
|
"loss": 0.294, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 7.4578166580651335e-06, |
|
"loss": 0.2974, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 7.388827920005628e-06, |
|
"loss": 0.2773, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"learning_rate": 7.319972297266215e-06, |
|
"loss": 0.2718, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"learning_rate": 7.2512533000476625e-06, |
|
"loss": 0.2964, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 29.46, |
|
"learning_rate": 7.182674431585703e-06, |
|
"loss": 0.2784, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 7.114239187972416e-06, |
|
"loss": 0.3001, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 7.045951057978001e-06, |
|
"loss": 0.2877, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 6.977813522872943e-06, |
|
"loss": 0.2964, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 29.92, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 0.2923, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 30.03, |
|
"learning_rate": 6.8420041238497525e-06, |
|
"loss": 0.2839, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 30.14, |
|
"learning_rate": 6.774339183378663e-06, |
|
"loss": 0.2679, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"learning_rate": 6.7068386843380695e-06, |
|
"loss": 0.2751, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 30.37, |
|
"learning_rate": 6.639506067845698e-06, |
|
"loss": 0.2588, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"learning_rate": 6.572344766460776e-06, |
|
"loss": 0.2828, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 30.59, |
|
"learning_rate": 6.505358204009018e-06, |
|
"loss": 0.2904, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 30.71, |
|
"learning_rate": 6.438549795408107e-06, |
|
"loss": 0.2712, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"learning_rate": 6.3719229464935915e-06, |
|
"loss": 0.2765, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"learning_rate": 6.305481053845262e-06, |
|
"loss": 0.2732, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 31.05, |
|
"learning_rate": 6.239227504614004e-06, |
|
"loss": 0.2604, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 6.173165676349103e-06, |
|
"loss": 0.2535, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 31.27, |
|
"learning_rate": 6.107298936826086e-06, |
|
"loss": 0.2607, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 31.39, |
|
"learning_rate": 6.041630643875018e-06, |
|
"loss": 0.2769, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"learning_rate": 5.9761641452093225e-06, |
|
"loss": 0.2747, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 31.61, |
|
"learning_rate": 5.910902778255134e-06, |
|
"loss": 0.2578, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 31.73, |
|
"learning_rate": 5.845849869981137e-06, |
|
"loss": 0.2566, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 31.84, |
|
"learning_rate": 5.781008736728975e-06, |
|
"loss": 0.2658, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 31.95, |
|
"learning_rate": 5.716382684044191e-06, |
|
"loss": 0.2732, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 32.07, |
|
"learning_rate": 5.6519750065076815e-06, |
|
"loss": 0.2656, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 32.18, |
|
"learning_rate": 5.587788987567785e-06, |
|
"loss": 0.2386, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"learning_rate": 5.523827899372876e-06, |
|
"loss": 0.2554, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 32.41, |
|
"learning_rate": 5.460095002604533e-06, |
|
"loss": 0.2511, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"learning_rate": 5.396593546311346e-06, |
|
"loss": 0.2641, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 32.63, |
|
"learning_rate": 5.333326767743263e-06, |
|
"loss": 0.2553, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"learning_rate": 5.276589982435913e-06, |
|
"loss": 0.2508, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 5.213777967188747e-06, |
|
"loss": 0.2533, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 32.97, |
|
"learning_rate": 5.151209949448599e-06, |
|
"loss": 0.2621, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 33.09, |
|
"learning_rate": 5.088889118879516e-06, |
|
"loss": 0.2381, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"learning_rate": 5.02681865254417e-06, |
|
"loss": 0.2405, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 33.31, |
|
"learning_rate": 4.965001714741851e-06, |
|
"loss": 0.2473, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 33.43, |
|
"learning_rate": 4.903441456847198e-06, |
|
"loss": 0.2345, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 33.54, |
|
"learning_rate": 4.8421410171495265e-06, |
|
"loss": 0.2349, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 4.781103520692839e-06, |
|
"loss": 0.2466, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 33.77, |
|
"learning_rate": 4.720332079116523e-06, |
|
"loss": 0.2512, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"learning_rate": 4.659829790496699e-06, |
|
"loss": 0.2633, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"learning_rate": 4.599599739188322e-06, |
|
"loss": 0.247, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 34.11, |
|
"learning_rate": 4.539644995667911e-06, |
|
"loss": 0.2231, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 34.22, |
|
"learning_rate": 4.479968616377024e-06, |
|
"loss": 0.2174, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 34.33, |
|
"learning_rate": 4.420573643566455e-06, |
|
"loss": 0.231, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 34.45, |
|
"learning_rate": 4.361463105141137e-06, |
|
"loss": 0.2545, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 34.56, |
|
"learning_rate": 4.302640014505779e-06, |
|
"loss": 0.2502, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 4.244107370411248e-06, |
|
"loss": 0.2505, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 34.79, |
|
"learning_rate": 4.185868156801695e-06, |
|
"loss": 0.225, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 34.9, |
|
"learning_rate": 4.1279253426624345e-06, |
|
"loss": 0.2436, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 4.07028188186859e-06, |
|
"loss": 0.2274, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 35.13, |
|
"learning_rate": 4.0129407130345114e-06, |
|
"loss": 0.214, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"learning_rate": 3.955904759363958e-06, |
|
"loss": 0.2088, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 35.35, |
|
"learning_rate": 3.899176928501078e-06, |
|
"loss": 0.244, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 35.47, |
|
"learning_rate": 3.842760112382183e-06, |
|
"loss": 0.2255, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"learning_rate": 3.7866571870883382e-06, |
|
"loss": 0.2282, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"learning_rate": 3.7308710126986934e-06, |
|
"loss": 0.2415, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"learning_rate": 3.675404433144727e-06, |
|
"loss": 0.2415, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 35.92, |
|
"learning_rate": 3.6202602760652395e-06, |
|
"loss": 0.2301, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 36.03, |
|
"learning_rate": 3.565441352662211e-06, |
|
"loss": 0.2301, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 36.15, |
|
"learning_rate": 3.510950457557487e-06, |
|
"loss": 0.2213, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 36.26, |
|
"learning_rate": 3.4567903686503103e-06, |
|
"loss": 0.2174, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 36.37, |
|
"learning_rate": 3.4029638469757055e-06, |
|
"loss": 0.2236, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 36.49, |
|
"learning_rate": 3.3494736365637304e-06, |
|
"loss": 0.2235, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"learning_rate": 3.2963224642995675e-06, |
|
"loss": 0.222, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 3.2435130397845472e-06, |
|
"loss": 0.2237, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 36.83, |
|
"learning_rate": 3.1910480551979706e-06, |
|
"loss": 0.2239, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 36.94, |
|
"learning_rate": 3.1389301851598976e-06, |
|
"loss": 0.2149, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 3.0871620865947816e-06, |
|
"loss": 0.2279, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 37.17, |
|
"learning_rate": 3.0357463985960257e-06, |
|
"loss": 0.2007, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 37.28, |
|
"learning_rate": 2.9846857422914434e-06, |
|
"loss": 0.2132, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 2.933982720709637e-06, |
|
"loss": 0.2147, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 37.51, |
|
"learning_rate": 2.883639918647296e-06, |
|
"loss": 0.2121, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 37.62, |
|
"learning_rate": 2.833659902537429e-06, |
|
"loss": 0.2147, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"learning_rate": 2.7840452203185154e-06, |
|
"loss": 0.2127, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"learning_rate": 2.7347984013046435e-06, |
|
"loss": 0.2308, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 37.96, |
|
"learning_rate": 2.6859219560565407e-06, |
|
"loss": 0.2152, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 38.07, |
|
"learning_rate": 2.637418376253591e-06, |
|
"loss": 0.2161, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 38.19, |
|
"learning_rate": 2.589290134566824e-06, |
|
"loss": 0.2206, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 2.541539684532852e-06, |
|
"loss": 0.2256, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 38.41, |
|
"learning_rate": 2.4941694604287913e-06, |
|
"loss": 0.2096, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 38.53, |
|
"learning_rate": 2.447181877148165e-06, |
|
"loss": 0.2058, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 2.4005793300777933e-06, |
|
"loss": 0.2102, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"learning_rate": 2.3543641949756835e-06, |
|
"loss": 0.2139, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 38.87, |
|
"learning_rate": 2.30853882784991e-06, |
|
"loss": 0.2048, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"learning_rate": 2.2631055648385e-06, |
|
"loss": 0.2071, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 39.09, |
|
"learning_rate": 2.2180667220903697e-06, |
|
"loss": 0.206, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 39.21, |
|
"learning_rate": 2.1734245956472024e-06, |
|
"loss": 0.2049, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 39.32, |
|
"learning_rate": 2.1291814613264383e-06, |
|
"loss": 0.2096, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 39.43, |
|
"learning_rate": 2.085339574605243e-06, |
|
"loss": 0.1986, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 39.55, |
|
"learning_rate": 2.0419011705055115e-06, |
|
"loss": 0.2036, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"learning_rate": 1.998868463479945e-06, |
|
"loss": 0.21, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 39.77, |
|
"learning_rate": 1.956243647299155e-06, |
|
"loss": 0.2155, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 39.89, |
|
"learning_rate": 1.914028894939829e-06, |
|
"loss": 0.2086, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.8722263584739486e-06, |
|
"loss": 0.1997, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 40.11, |
|
"learning_rate": 1.830838168959075e-06, |
|
"loss": 0.1932, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 40.23, |
|
"learning_rate": 1.7898664363297302e-06, |
|
"loss": 0.1997, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 40.34, |
|
"learning_rate": 1.7493132492898134e-06, |
|
"loss": 0.1975, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 40.45, |
|
"learning_rate": 1.7091806752061212e-06, |
|
"loss": 0.2072, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 1.6694707600029702e-06, |
|
"loss": 0.1786, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 1.630185528057886e-06, |
|
"loss": 0.2044, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 40.79, |
|
"learning_rate": 1.5913269820984023e-06, |
|
"loss": 0.2129, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 1.5528971030999706e-06, |
|
"loss": 0.2219, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"learning_rate": 1.5148978501849642e-06, |
|
"loss": 0.2023, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"learning_rate": 1.4773311605228059e-06, |
|
"loss": 0.1975, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 41.25, |
|
"learning_rate": 1.4401989492312164e-06, |
|
"loss": 0.1922, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 41.36, |
|
"learning_rate": 1.403503109278579e-06, |
|
"loss": 0.1868, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 41.47, |
|
"learning_rate": 1.3672455113874429e-06, |
|
"loss": 0.2009, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 1.3314280039391426e-06, |
|
"loss": 0.2064, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 41.7, |
|
"learning_rate": 1.2960524128795837e-06, |
|
"loss": 0.2036, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 41.81, |
|
"learning_rate": 1.2611205416261595e-06, |
|
"loss": 0.1896, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 41.93, |
|
"learning_rate": 1.2266341709757946e-06, |
|
"loss": 0.2078, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 42.04, |
|
"learning_rate": 1.192595059014179e-06, |
|
"loss": 0.1931, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 42.15, |
|
"learning_rate": 1.1590049410261384e-06, |
|
"loss": 0.203, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 42.27, |
|
"learning_rate": 1.1258655294071686e-06, |
|
"loss": 0.1928, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 42.38, |
|
"learning_rate": 1.0931785135761375e-06, |
|
"loss": 0.1898, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 42.49, |
|
"learning_rate": 1.0609455598891682e-06, |
|
"loss": 0.2001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 1.029168311554678e-06, |
|
"loss": 0.2011, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 42.72, |
|
"learning_rate": 9.978483885496204e-07, |
|
"loss": 0.1871, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 42.83, |
|
"learning_rate": 9.669873875368852e-07, |
|
"loss": 0.2027, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"learning_rate": 9.365868817839264e-07, |
|
"loss": 0.1941, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 43.06, |
|
"learning_rate": 9.066484210825288e-07, |
|
"loss": 0.1917, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 43.17, |
|
"learning_rate": 8.771735316698249e-07, |
|
"loss": 0.192, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 8.481637161504741e-07, |
|
"loss": 0.1959, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 8.19620453420068e-07, |
|
"loss": 0.192, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 43.51, |
|
"learning_rate": 7.915451985897382e-07, |
|
"loss": 0.2027, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 43.63, |
|
"learning_rate": 7.639393829119701e-07, |
|
"loss": 0.19, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 43.74, |
|
"learning_rate": 7.368044137076435e-07, |
|
"loss": 0.1919, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 43.85, |
|
"learning_rate": 7.101416742942913e-07, |
|
"loss": 0.1897, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 43.97, |
|
"learning_rate": 6.839525239155675e-07, |
|
"loss": 0.199, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 44.08, |
|
"learning_rate": 6.582382976719703e-07, |
|
"loss": 0.2, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 44.19, |
|
"learning_rate": 6.330003064527679e-07, |
|
"loss": 0.1917, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"learning_rate": 6.082398368691711e-07, |
|
"loss": 0.1804, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 44.42, |
|
"learning_rate": 5.839581511887515e-07, |
|
"loss": 0.1971, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 44.53, |
|
"learning_rate": 5.601564872710851e-07, |
|
"loss": 0.1895, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 44.65, |
|
"learning_rate": 5.36836058504645e-07, |
|
"loss": 0.2008, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 44.76, |
|
"learning_rate": 5.13998053744954e-07, |
|
"loss": 0.1871, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 4.916436372539668e-07, |
|
"loss": 0.1899, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"learning_rate": 4.697739486407227e-07, |
|
"loss": 0.1917, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 4.4839010280325003e-07, |
|
"loss": 0.1976, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 45.21, |
|
"learning_rate": 4.2749318987172385e-07, |
|
"loss": 0.1915, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 4.070842751529025e-07, |
|
"loss": 0.2004, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 45.44, |
|
"learning_rate": 3.871643990758056e-07, |
|
"loss": 0.1947, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 45.55, |
|
"learning_rate": 3.6773457713868423e-07, |
|
"loss": 0.1793, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 45.67, |
|
"learning_rate": 3.487957998572511e-07, |
|
"loss": 0.1882, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 45.78, |
|
"learning_rate": 3.3034903271417564e-07, |
|
"loss": 0.1862, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"learning_rate": 3.1239521610987757e-07, |
|
"loss": 0.1862, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 2.949352653145754e-07, |
|
"loss": 0.1987, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 46.12, |
|
"learning_rate": 2.779700704216337e-07, |
|
"loss": 0.1837, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 2.6150049630218277e-07, |
|
"loss": 0.1949, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 46.35, |
|
"learning_rate": 2.4552738256102717e-07, |
|
"loss": 0.1892, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 46.46, |
|
"learning_rate": 2.3005154349385106e-07, |
|
"loss": 0.1959, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 46.57, |
|
"learning_rate": 2.1507376804569935e-07, |
|
"loss": 0.1925, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 46.69, |
|
"learning_rate": 2.0059481977075523e-07, |
|
"loss": 0.1933, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 1.8661543679342365e-07, |
|
"loss": 0.2049, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 46.91, |
|
"learning_rate": 1.731363317706969e-07, |
|
"loss": 0.1756, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 47.03, |
|
"learning_rate": 1.601581918558237e-07, |
|
"loss": 0.1746, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 47.14, |
|
"learning_rate": 1.4768167866328176e-07, |
|
"loss": 0.1963, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 47.25, |
|
"learning_rate": 1.357074282350457e-07, |
|
"loss": 0.1903, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"learning_rate": 1.2423605100816304e-07, |
|
"loss": 0.1821, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 47.48, |
|
"learning_rate": 1.1326813178363927e-07, |
|
"loss": 0.1946, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 47.59, |
|
"learning_rate": 1.0280422969661696e-07, |
|
"loss": 0.1832, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 47.71, |
|
"learning_rate": 9.284487818787879e-08, |
|
"loss": 0.1892, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 47.82, |
|
"learning_rate": 8.33905849766481e-08, |
|
"loss": 0.1883, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 47.93, |
|
"learning_rate": 7.444183203471067e-08, |
|
"loss": 0.1921, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 48.05, |
|
"learning_rate": 6.599907556184115e-08, |
|
"loss": 0.2003, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 48.16, |
|
"learning_rate": 5.806274596254491e-08, |
|
"loss": 0.1789, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 48.27, |
|
"learning_rate": 5.0633247824118936e-08, |
|
"loss": 0.1853, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 4.371095989602614e-08, |
|
"loss": 0.1968, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"learning_rate": 3.729623507058744e-08, |
|
"loss": 0.2019, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 48.61, |
|
"learning_rate": 3.13894003649895e-08, |
|
"loss": 0.1808, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 48.73, |
|
"learning_rate": 2.5990756904614723e-08, |
|
"loss": 0.1959, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 2.1100579907691322e-08, |
|
"loss": 0.1781, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"learning_rate": 1.6719118671262302e-08, |
|
"loss": 0.1868, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 49.07, |
|
"learning_rate": 1.2846596558473424e-08, |
|
"loss": 0.1887, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"learning_rate": 9.483210987193404e-09, |
|
"loss": 0.1982, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 49.29, |
|
"learning_rate": 6.62913341994087e-09, |
|
"loss": 0.1957, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 49.41, |
|
"learning_rate": 4.284509355151345e-09, |
|
"loss": 0.1925, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 49.52, |
|
"learning_rate": 2.449458319754294e-09, |
|
"loss": 0.1859, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 49.63, |
|
"learning_rate": 1.124073863082442e-09, |
|
"loss": 0.187, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 49.75, |
|
"learning_rate": 3.0842355210336515e-10, |
|
"loss": 0.1792, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 49.86, |
|
"learning_rate": 2.548967970028571e-12, |
|
"loss": 0.1895, |
|
"step": 4400 |
|
} |
|
], |
|
"max_steps": 4400, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.496431653421466e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|