{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.858356940509914, "global_step": 4400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 1.9999745104274995e-05, "loss": 1.1665, "step": 10 }, { "epoch": 0.23, "learning_rate": 1.9998980430094333e-05, "loss": 1.1142, "step": 20 }, { "epoch": 0.34, "learning_rate": 1.9997706016440462e-05, "loss": 1.0657, "step": 30 }, { "epoch": 0.45, "learning_rate": 1.9995921928281893e-05, "loss": 1.0229, "step": 40 }, { "epoch": 0.57, "learning_rate": 1.999362825656992e-05, "loss": 1.0155, "step": 50 }, { "epoch": 0.68, "learning_rate": 1.9990825118233958e-05, "loss": 0.9919, "step": 60 }, { "epoch": 0.79, "learning_rate": 1.9987512656175612e-05, "loss": 0.9623, "step": 70 }, { "epoch": 0.91, "learning_rate": 1.9983691039261358e-05, "loss": 0.937, "step": 80 }, { "epoch": 1.02, "learning_rate": 1.9979360462313965e-05, "loss": 0.9527, "step": 90 }, { "epoch": 1.13, "learning_rate": 1.9974521146102535e-05, "loss": 0.9173, "step": 100 }, { "epoch": 1.25, "learning_rate": 1.9969173337331283e-05, "loss": 0.91, "step": 110 }, { "epoch": 1.36, "learning_rate": 1.9963317308626916e-05, "loss": 0.9045, "step": 120 }, { "epoch": 1.47, "learning_rate": 1.9956953358524774e-05, "loss": 0.94, "step": 130 }, { "epoch": 1.59, "learning_rate": 1.9950081811453598e-05, "loss": 0.9086, "step": 140 }, { "epoch": 1.7, "learning_rate": 1.9942703017718977e-05, "loss": 0.8946, "step": 150 }, { "epoch": 1.81, "learning_rate": 1.99348173534855e-05, "loss": 0.8836, "step": 160 }, { "epoch": 1.93, "learning_rate": 1.9926425220757607e-05, "loss": 0.9001, "step": 170 }, { "epoch": 2.04, "learning_rate": 1.991752704735903e-05, "loss": 0.8826, "step": 180 }, { "epoch": 2.15, "learning_rate": 1.990812328691105e-05, "loss": 0.8549, "step": 190 }, { "epoch": 2.27, "learning_rate": 1.989821441880933e-05, "loss": 0.869, "step": 200 }, { "epoch": 2.38, "learning_rate": 1.9887800948199496e-05, "loss": 0.8772, "step": 210 }, { "epoch": 2.49, "learning_rate": 1.9876883405951378e-05, "loss": 0.8691, "step": 220 }, { "epoch": 2.61, "learning_rate": 1.9865462348631945e-05, "loss": 0.8978, "step": 230 }, { "epoch": 2.72, "learning_rate": 1.9853538358476933e-05, "loss": 0.8576, "step": 240 }, { "epoch": 2.83, "learning_rate": 1.984111204336116e-05, "loss": 0.8506, "step": 250 }, { "epoch": 2.95, "learning_rate": 1.9828184036767556e-05, "loss": 0.8679, "step": 260 }, { "epoch": 3.06, "learning_rate": 1.981475499775484e-05, "loss": 0.888, "step": 270 }, { "epoch": 3.17, "learning_rate": 1.9800825610923937e-05, "loss": 0.8369, "step": 280 }, { "epoch": 3.29, "learning_rate": 1.9786396586383078e-05, "loss": 0.8244, "step": 290 }, { "epoch": 3.4, "learning_rate": 1.9771468659711595e-05, "loss": 0.8495, "step": 300 }, { "epoch": 3.51, "learning_rate": 1.9756042591922436e-05, "loss": 0.85, "step": 310 }, { "epoch": 3.63, "learning_rate": 1.9740119169423337e-05, "loss": 0.8307, "step": 320 }, { "epoch": 3.74, "learning_rate": 1.9723699203976768e-05, "loss": 0.8419, "step": 330 }, { "epoch": 3.85, "learning_rate": 1.9706783532658528e-05, "loss": 0.8483, "step": 340 }, { "epoch": 3.97, "learning_rate": 1.9689373017815076e-05, "loss": 0.8181, "step": 350 }, { "epoch": 4.08, "learning_rate": 1.9671468547019575e-05, "loss": 0.8203, "step": 360 }, { "epoch": 4.19, "learning_rate": 1.9653071033026635e-05, "loss": 0.8132, "step": 370 }, { "epoch": 4.31, "learning_rate": 1.963418141372579e-05, "loss": 0.8299, "step": 380 }, { "epoch": 4.42, "learning_rate": 1.9614800652093685e-05, "loss": 0.8228, "step": 390 }, { "epoch": 4.53, "learning_rate": 1.9594929736144978e-05, "loss": 0.8083, "step": 400 }, { "epoch": 4.65, "learning_rate": 1.9574569678881965e-05, "loss": 0.8054, "step": 410 }, { "epoch": 4.76, "learning_rate": 1.955372151824297e-05, "loss": 0.8127, "step": 420 }, { "epoch": 4.87, "learning_rate": 1.9532386317049387e-05, "loss": 0.8047, "step": 430 }, { "epoch": 4.99, "learning_rate": 1.9510565162951538e-05, "loss": 0.8113, "step": 440 }, { "epoch": 5.1, "learning_rate": 1.9488259168373198e-05, "loss": 0.7806, "step": 450 }, { "epoch": 5.21, "learning_rate": 1.94654694704549e-05, "loss": 0.7973, "step": 460 }, { "epoch": 5.33, "learning_rate": 1.944219723099596e-05, "loss": 0.7968, "step": 470 }, { "epoch": 5.44, "learning_rate": 1.941844363639525e-05, "loss": 0.7838, "step": 480 }, { "epoch": 5.55, "learning_rate": 1.9394209897590707e-05, "loss": 0.7798, "step": 490 }, { "epoch": 5.67, "learning_rate": 1.936949724999762e-05, "loss": 0.7791, "step": 500 }, { "epoch": 5.78, "learning_rate": 1.9344306953445632e-05, "loss": 0.7632, "step": 510 }, { "epoch": 5.89, "learning_rate": 1.9318640292114526e-05, "loss": 0.7984, "step": 520 }, { "epoch": 6.01, "learning_rate": 1.929249857446875e-05, "loss": 0.7763, "step": 530 }, { "epoch": 6.12, "learning_rate": 1.9265883133190715e-05, "loss": 0.7489, "step": 540 }, { "epoch": 6.23, "learning_rate": 1.9238795325112867e-05, "loss": 0.7739, "step": 550 }, { "epoch": 6.35, "learning_rate": 1.92112365311485e-05, "loss": 0.7664, "step": 560 }, { "epoch": 6.46, "learning_rate": 1.918320815622137e-05, "loss": 0.76, "step": 570 }, { "epoch": 6.57, "learning_rate": 1.9154711629194062e-05, "loss": 0.7604, "step": 580 }, { "epoch": 6.69, "learning_rate": 1.912574840279516e-05, "loss": 0.7598, "step": 590 }, { "epoch": 6.8, "learning_rate": 1.9096319953545186e-05, "loss": 0.7585, "step": 600 }, { "epoch": 6.91, "learning_rate": 1.9066427781681314e-05, "loss": 0.7312, "step": 610 }, { "epoch": 7.03, "learning_rate": 1.9036073411080917e-05, "loss": 0.732, "step": 620 }, { "epoch": 7.14, "learning_rate": 1.900525838918385e-05, "loss": 0.7392, "step": 630 }, { "epoch": 7.25, "learning_rate": 1.8973984286913584e-05, "loss": 0.6986, "step": 640 }, { "epoch": 7.37, "learning_rate": 1.8942252698597113e-05, "loss": 0.7281, "step": 650 }, { "epoch": 7.48, "learning_rate": 1.891006524188368e-05, "loss": 0.7316, "step": 660 }, { "epoch": 7.59, "learning_rate": 1.8877423557662307e-05, "loss": 0.7329, "step": 670 }, { "epoch": 7.71, "learning_rate": 1.8844329309978146e-05, "loss": 0.7409, "step": 680 }, { "epoch": 7.82, "learning_rate": 1.8810784185947648e-05, "loss": 0.7345, "step": 690 }, { "epoch": 7.93, "learning_rate": 1.8776789895672557e-05, "loss": 0.7418, "step": 700 }, { "epoch": 8.05, "learning_rate": 1.8742348172152728e-05, "loss": 0.7426, "step": 710 }, { "epoch": 8.16, "learning_rate": 1.8707460771197773e-05, "loss": 0.6952, "step": 720 }, { "epoch": 8.27, "learning_rate": 1.8672129471337568e-05, "loss": 0.7136, "step": 730 }, { "epoch": 8.39, "learning_rate": 1.863635607373157e-05, "loss": 0.7284, "step": 740 }, { "epoch": 8.5, "learning_rate": 1.8600142402077006e-05, "loss": 0.6978, "step": 750 }, { "epoch": 8.61, "learning_rate": 1.856349030251589e-05, "loss": 0.6843, "step": 760 }, { "epoch": 8.73, "learning_rate": 1.8526401643540924e-05, "loss": 0.6895, "step": 770 }, { "epoch": 8.84, "learning_rate": 1.8488878315900228e-05, "loss": 0.7084, "step": 780 }, { "epoch": 8.95, "learning_rate": 1.8450922232500966e-05, "loss": 0.7028, "step": 790 }, { "epoch": 9.07, "learning_rate": 1.8412535328311813e-05, "loss": 0.6905, "step": 800 }, { "epoch": 9.18, "learning_rate": 1.837371956026433e-05, "loss": 0.6851, "step": 810 }, { "epoch": 9.29, "learning_rate": 1.8334476907153177e-05, "loss": 0.6759, "step": 820 }, { "epoch": 9.41, "learning_rate": 1.8294809369535265e-05, "loss": 0.6613, "step": 830 }, { "epoch": 9.52, "learning_rate": 1.825471896962774e-05, "loss": 0.6814, "step": 840 }, { "epoch": 9.63, "learning_rate": 1.8214207751204917e-05, "loss": 0.7044, "step": 850 }, { "epoch": 9.75, "learning_rate": 1.817327777949407e-05, "loss": 0.6753, "step": 860 }, { "epoch": 9.86, "learning_rate": 1.8131931141070166e-05, "loss": 0.6748, "step": 870 }, { "epoch": 9.97, "learning_rate": 1.8090169943749477e-05, "loss": 0.6853, "step": 880 }, { "epoch": 10.08, "learning_rate": 1.8047996316482134e-05, "loss": 0.6406, "step": 890 }, { "epoch": 10.2, "learning_rate": 1.8005412409243604e-05, "loss": 0.6391, "step": 900 }, { "epoch": 10.31, "learning_rate": 1.7962420392925066e-05, "loss": 0.6524, "step": 910 }, { "epoch": 10.42, "learning_rate": 1.7919022459222754e-05, "loss": 0.6618, "step": 920 }, { "epoch": 10.54, "learning_rate": 1.787522082052622e-05, "loss": 0.6604, "step": 930 }, { "epoch": 10.65, "learning_rate": 1.7831017709805555e-05, "loss": 0.6706, "step": 940 }, { "epoch": 10.76, "learning_rate": 1.778641538049755e-05, "loss": 0.6589, "step": 950 }, { "epoch": 10.88, "learning_rate": 1.7741416106390828e-05, "loss": 0.6631, "step": 960 }, { "epoch": 10.99, "learning_rate": 1.7696022181509892e-05, "loss": 0.6408, "step": 970 }, { "epoch": 11.1, "learning_rate": 1.7650235919998234e-05, "loss": 0.6077, "step": 980 }, { "epoch": 11.22, "learning_rate": 1.7604059656000313e-05, "loss": 0.6267, "step": 990 }, { "epoch": 11.33, "learning_rate": 1.7557495743542586e-05, "loss": 0.6324, "step": 1000 }, { "epoch": 11.44, "learning_rate": 1.75105465564135e-05, "loss": 0.645, "step": 1010 }, { "epoch": 11.56, "learning_rate": 1.7463214488042472e-05, "loss": 0.6167, "step": 1020 }, { "epoch": 11.67, "learning_rate": 1.741550195137788e-05, "loss": 0.6311, "step": 1030 }, { "epoch": 11.78, "learning_rate": 1.736741137876405e-05, "loss": 0.6329, "step": 1040 }, { "epoch": 11.9, "learning_rate": 1.7318945221817255e-05, "loss": 0.6469, "step": 1050 }, { "epoch": 12.01, "learning_rate": 1.727010595130074e-05, "loss": 0.6334, "step": 1060 }, { "epoch": 12.12, "learning_rate": 1.7220896056998753e-05, "loss": 0.5946, "step": 1070 }, { "epoch": 12.24, "learning_rate": 1.7171318047589637e-05, "loss": 0.5695, "step": 1080 }, { "epoch": 12.35, "learning_rate": 1.712137445051792e-05, "loss": 0.6101, "step": 1090 }, { "epoch": 12.46, "learning_rate": 1.7071067811865477e-05, "loss": 0.6203, "step": 1100 }, { "epoch": 12.58, "learning_rate": 1.7020400696221737e-05, "loss": 0.598, "step": 1110 }, { "epoch": 12.69, "learning_rate": 1.696937568655294e-05, "loss": 0.6177, "step": 1120 }, { "epoch": 12.8, "learning_rate": 1.691799538407044e-05, "loss": 0.6153, "step": 1130 }, { "epoch": 12.92, "learning_rate": 1.6866262408098134e-05, "loss": 0.6096, "step": 1140 }, { "epoch": 13.03, "learning_rate": 1.6814179395938915e-05, "loss": 0.612, "step": 1150 }, { "epoch": 13.14, "learning_rate": 1.6761749002740195e-05, "loss": 0.5858, "step": 1160 }, { "epoch": 13.26, "learning_rate": 1.6708973901358603e-05, "loss": 0.5715, "step": 1170 }, { "epoch": 13.37, "learning_rate": 1.6655856782223682e-05, "loss": 0.5834, "step": 1180 }, { "epoch": 13.48, "learning_rate": 1.660240035320075e-05, "loss": 0.5782, "step": 1190 }, { "epoch": 13.6, "learning_rate": 1.6548607339452853e-05, "loss": 0.5621, "step": 1200 }, { "epoch": 13.71, "learning_rate": 1.6494480483301836e-05, "loss": 0.5729, "step": 1210 }, { "epoch": 13.82, "learning_rate": 1.6440022544088553e-05, "loss": 0.5958, "step": 1220 }, { "epoch": 13.94, "learning_rate": 1.6385236298032183e-05, "loss": 0.6007, "step": 1230 }, { "epoch": 14.05, "learning_rate": 1.6330124538088705e-05, "loss": 0.5658, "step": 1240 }, { "epoch": 14.16, "learning_rate": 1.627469007380852e-05, "loss": 0.5674, "step": 1250 }, { "epoch": 14.28, "learning_rate": 1.6218935731193223e-05, "loss": 0.5649, "step": 1260 }, { "epoch": 14.39, "learning_rate": 1.616286435255153e-05, "loss": 0.5497, "step": 1270 }, { "epoch": 14.5, "learning_rate": 1.6106478796354382e-05, "loss": 0.5865, "step": 1280 }, { "epoch": 14.62, "learning_rate": 1.6049781937089227e-05, "loss": 0.547, "step": 1290 }, { "epoch": 14.73, "learning_rate": 1.599277666511347e-05, "loss": 0.5473, "step": 1300 }, { "epoch": 14.84, "learning_rate": 1.5935465886507143e-05, "loss": 0.5553, "step": 1310 }, { "epoch": 14.96, "learning_rate": 1.5877852522924733e-05, "loss": 0.5672, "step": 1320 }, { "epoch": 15.07, "learning_rate": 1.581993951144626e-05, "loss": 0.5392, "step": 1330 }, { "epoch": 15.18, "learning_rate": 1.576172980442753e-05, "loss": 0.504, "step": 1340 }, { "epoch": 15.3, "learning_rate": 1.5703226369349642e-05, "loss": 0.5446, "step": 1350 }, { "epoch": 15.41, "learning_rate": 1.5644432188667695e-05, "loss": 0.5323, "step": 1360 }, { "epoch": 15.52, "learning_rate": 1.5585350259658748e-05, "loss": 0.54, "step": 1370 }, { "epoch": 15.64, "learning_rate": 1.5525983594269026e-05, "loss": 0.5387, "step": 1380 }, { "epoch": 15.75, "learning_rate": 1.546633521896038e-05, "loss": 0.5349, "step": 1390 }, { "epoch": 15.86, "learning_rate": 1.5406408174555978e-05, "loss": 0.5303, "step": 1400 }, { "epoch": 15.98, "learning_rate": 1.5346205516085305e-05, "loss": 0.561, "step": 1410 }, { "epoch": 16.09, "learning_rate": 1.528573031262842e-05, "loss": 0.5295, "step": 1420 }, { "epoch": 16.2, "learning_rate": 1.5224985647159489e-05, "loss": 0.4995, "step": 1430 }, { "epoch": 16.32, "learning_rate": 1.5163974616389621e-05, "loss": 0.5151, "step": 1440 }, { "epoch": 16.43, "learning_rate": 1.5102700330609e-05, "loss": 0.5042, "step": 1450 }, { "epoch": 16.54, "learning_rate": 1.504116591352832e-05, "loss": 0.502, "step": 1460 }, { "epoch": 16.66, "learning_rate": 1.497937450211956e-05, "loss": 0.5101, "step": 1470 }, { "epoch": 16.77, "learning_rate": 1.491732924645604e-05, "loss": 0.5323, "step": 1480 }, { "epoch": 16.88, "learning_rate": 1.4855033309551842e-05, "loss": 0.509, "step": 1490 }, { "epoch": 17.0, "learning_rate": 1.479248986720057e-05, "loss": 0.5292, "step": 1500 }, { "epoch": 17.11, "learning_rate": 1.4729702107813438e-05, "loss": 0.4835, "step": 1510 }, { "epoch": 17.22, "learning_rate": 1.4666673232256738e-05, "loss": 0.4934, "step": 1520 }, { "epoch": 17.34, "learning_rate": 1.4603406453688656e-05, "loss": 0.4811, "step": 1530 }, { "epoch": 17.45, "learning_rate": 1.4539904997395468e-05, "loss": 0.4966, "step": 1540 }, { "epoch": 17.56, "learning_rate": 1.4476172100627127e-05, "loss": 0.4906, "step": 1550 }, { "epoch": 17.68, "learning_rate": 1.4412211012432213e-05, "loss": 0.4956, "step": 1560 }, { "epoch": 17.79, "learning_rate": 1.4348024993492323e-05, "loss": 0.5003, "step": 1570 }, { "epoch": 17.9, "learning_rate": 1.4283617315955815e-05, "loss": 0.5131, "step": 1580 }, { "epoch": 18.02, "learning_rate": 1.4218991263271024e-05, "loss": 0.4642, "step": 1590 }, { "epoch": 18.13, "learning_rate": 1.4154150130018867e-05, "loss": 0.4679, "step": 1600 }, { "epoch": 18.24, "learning_rate": 1.408909722174487e-05, "loss": 0.4555, "step": 1610 }, { "epoch": 18.36, "learning_rate": 1.4023835854790682e-05, "loss": 0.4663, "step": 1620 }, { "epoch": 18.47, "learning_rate": 1.3958369356124986e-05, "loss": 0.4991, "step": 1630 }, { "epoch": 18.58, "learning_rate": 1.3892701063173917e-05, "loss": 0.4592, "step": 1640 }, { "epoch": 18.7, "learning_rate": 1.3826834323650899e-05, "loss": 0.4697, "step": 1650 }, { "epoch": 18.81, "learning_rate": 1.3760772495385998e-05, "loss": 0.4812, "step": 1660 }, { "epoch": 18.92, "learning_rate": 1.369451894615474e-05, "loss": 0.4868, "step": 1670 }, { "epoch": 19.04, "learning_rate": 1.362807705350641e-05, "loss": 0.4511, "step": 1680 }, { "epoch": 19.15, "learning_rate": 1.3561450204591898e-05, "loss": 0.4332, "step": 1690 }, { "epoch": 19.26, "learning_rate": 1.3494641795990986e-05, "loss": 0.4392, "step": 1700 }, { "epoch": 19.38, "learning_rate": 1.3427655233539227e-05, "loss": 0.435, "step": 1710 }, { "epoch": 19.49, "learning_rate": 1.3360493932154301e-05, "loss": 0.4377, "step": 1720 }, { "epoch": 19.6, "learning_rate": 1.3293161315661934e-05, "loss": 0.4509, "step": 1730 }, { "epoch": 19.72, "learning_rate": 1.3225660816621342e-05, "loss": 0.4564, "step": 1740 }, { "epoch": 19.83, "learning_rate": 1.3157995876150252e-05, "loss": 0.471, "step": 1750 }, { "epoch": 19.94, "learning_rate": 1.3090169943749475e-05, "loss": 0.4759, "step": 1760 }, { "epoch": 20.06, "learning_rate": 1.302218647712706e-05, "loss": 0.454, "step": 1770 }, { "epoch": 20.17, "learning_rate": 1.2954048942022002e-05, "loss": 0.3999, "step": 1780 }, { "epoch": 20.28, "learning_rate": 1.288576081202759e-05, "loss": 0.415, "step": 1790 }, { "epoch": 20.4, "learning_rate": 1.2817325568414299e-05, "loss": 0.4349, "step": 1800 }, { "epoch": 20.51, "learning_rate": 1.2748746699952338e-05, "loss": 0.4281, "step": 1810 }, { "epoch": 20.62, "learning_rate": 1.2680027702733791e-05, "loss": 0.4391, "step": 1820 }, { "epoch": 20.74, "learning_rate": 1.2611172079994377e-05, "loss": 0.4288, "step": 1830 }, { "epoch": 20.85, "learning_rate": 1.2542183341934873e-05, "loss": 0.4562, "step": 1840 }, { "epoch": 20.96, "learning_rate": 1.2473065005542155e-05, "loss": 0.4294, "step": 1850 }, { "epoch": 21.08, "learning_rate": 1.2403820594409926e-05, "loss": 0.4147, "step": 1860 }, { "epoch": 21.19, "learning_rate": 1.2334453638559057e-05, "loss": 0.4003, "step": 1870 }, { "epoch": 21.3, "learning_rate": 1.2264967674257647e-05, "loss": 0.402, "step": 1880 }, { "epoch": 21.42, "learning_rate": 1.2195366243840745e-05, "loss": 0.3808, "step": 1890 }, { "epoch": 21.53, "learning_rate": 1.2125652895529766e-05, "loss": 0.4199, "step": 1900 }, { "epoch": 21.64, "learning_rate": 1.2055831183251608e-05, "loss": 0.4153, "step": 1910 }, { "epoch": 21.76, "learning_rate": 1.1985904666457455e-05, "loss": 0.4206, "step": 1920 }, { "epoch": 21.87, "learning_rate": 1.1915876909941356e-05, "loss": 0.4326, "step": 1930 }, { "epoch": 21.98, "learning_rate": 1.1845751483658454e-05, "loss": 0.4263, "step": 1940 }, { "epoch": 22.1, "learning_rate": 1.1775531962543036e-05, "loss": 0.3938, "step": 1950 }, { "epoch": 22.21, "learning_rate": 1.170522192632624e-05, "loss": 0.3748, "step": 1960 }, { "epoch": 22.32, "learning_rate": 1.1634824959353602e-05, "loss": 0.3757, "step": 1970 }, { "epoch": 22.44, "learning_rate": 1.156434465040231e-05, "loss": 0.3876, "step": 1980 }, { "epoch": 22.55, "learning_rate": 1.1493784592498252e-05, "loss": 0.395, "step": 1990 }, { "epoch": 22.66, "learning_rate": 1.1423148382732854e-05, "loss": 0.3938, "step": 2000 }, { "epoch": 22.78, "learning_rate": 1.1352439622079689e-05, "loss": 0.4146, "step": 2010 }, { "epoch": 22.89, "learning_rate": 1.1281661915210931e-05, "loss": 0.4206, "step": 2020 }, { "epoch": 23.0, "learning_rate": 1.1210818870313548e-05, "loss": 0.3975, "step": 2030 }, { "epoch": 23.12, "learning_rate": 1.1139914098905406e-05, "loss": 0.3613, "step": 2040 }, { "epoch": 23.23, "learning_rate": 1.1068951215651132e-05, "loss": 0.3572, "step": 2050 }, { "epoch": 23.34, "learning_rate": 1.0997933838177828e-05, "loss": 0.3773, "step": 2060 }, { "epoch": 23.46, "learning_rate": 1.0926865586890689e-05, "loss": 0.3846, "step": 2070 }, { "epoch": 23.57, "learning_rate": 1.08557500847884e-05, "loss": 0.3758, "step": 2080 }, { "epoch": 23.68, "learning_rate": 1.0784590957278452e-05, "loss": 0.393, "step": 2090 }, { "epoch": 23.8, "learning_rate": 1.0713391831992324e-05, "loss": 0.3699, "step": 2100 }, { "epoch": 23.91, "learning_rate": 1.064215633860055e-05, "loss": 0.3639, "step": 2110 }, { "epoch": 24.02, "learning_rate": 1.0570888108627682e-05, "loss": 0.3869, "step": 2120 }, { "epoch": 24.14, "learning_rate": 1.0499590775267142e-05, "loss": 0.3387, "step": 2130 }, { "epoch": 24.25, "learning_rate": 1.0428267973196027e-05, "loss": 0.3507, "step": 2140 }, { "epoch": 24.36, "learning_rate": 1.0356923338389807e-05, "loss": 0.3424, "step": 2150 }, { "epoch": 24.48, "learning_rate": 1.0285560507936962e-05, "loss": 0.3615, "step": 2160 }, { "epoch": 24.59, "learning_rate": 1.0214183119853583e-05, "loss": 0.3585, "step": 2170 }, { "epoch": 24.7, "learning_rate": 1.0142794812897874e-05, "loss": 0.3519, "step": 2180 }, { "epoch": 24.82, "learning_rate": 1.0071399226384695e-05, "loss": 0.3734, "step": 2190 }, { "epoch": 24.93, "learning_rate": 1e-05, "loss": 0.3788, "step": 2200 }, { "epoch": 25.04, "learning_rate": 9.928600773615306e-06, "loss": 0.3616, "step": 2210 }, { "epoch": 25.16, "learning_rate": 9.85720518710213e-06, "loss": 0.3345, "step": 2220 }, { "epoch": 25.27, "learning_rate": 9.78581688014642e-06, "loss": 0.3483, "step": 2230 }, { "epoch": 25.38, "learning_rate": 9.71443949206304e-06, "loss": 0.327, "step": 2240 }, { "epoch": 25.5, "learning_rate": 9.643076661610197e-06, "loss": 0.3422, "step": 2250 }, { "epoch": 25.61, "learning_rate": 9.571732026803978e-06, "loss": 0.3472, "step": 2260 }, { "epoch": 25.72, "learning_rate": 9.500409224732863e-06, "loss": 0.3475, "step": 2270 }, { "epoch": 25.84, "learning_rate": 9.42911189137232e-06, "loss": 0.3507, "step": 2280 }, { "epoch": 25.95, "learning_rate": 9.357843661399447e-06, "loss": 0.3534, "step": 2290 }, { "epoch": 26.06, "learning_rate": 9.286608168007678e-06, "loss": 0.3295, "step": 2300 }, { "epoch": 26.18, "learning_rate": 9.215409042721553e-06, "loss": 0.3187, "step": 2310 }, { "epoch": 26.29, "learning_rate": 9.144249915211605e-06, "loss": 0.3465, "step": 2320 }, { "epoch": 26.4, "learning_rate": 9.073134413109313e-06, "loss": 0.3262, "step": 2330 }, { "epoch": 26.52, "learning_rate": 9.002066161822174e-06, "loss": 0.3154, "step": 2340 }, { "epoch": 26.63, "learning_rate": 8.931048784348875e-06, "loss": 0.3277, "step": 2350 }, { "epoch": 26.74, "learning_rate": 8.860085901094595e-06, "loss": 0.3387, "step": 2360 }, { "epoch": 26.86, "learning_rate": 8.789181129686452e-06, "loss": 0.323, "step": 2370 }, { "epoch": 26.97, "learning_rate": 8.718338084789074e-06, "loss": 0.3243, "step": 2380 }, { "epoch": 27.08, "learning_rate": 8.647560377920311e-06, "loss": 0.3112, "step": 2390 }, { "epoch": 27.2, "learning_rate": 8.576851617267151e-06, "loss": 0.3138, "step": 2400 }, { "epoch": 27.31, "learning_rate": 8.50621540750175e-06, "loss": 0.294, "step": 2410 }, { "epoch": 27.42, "learning_rate": 8.43565534959769e-06, "loss": 0.3009, "step": 2420 }, { "epoch": 27.54, "learning_rate": 8.365175040646403e-06, "loss": 0.3217, "step": 2430 }, { "epoch": 27.65, "learning_rate": 8.294778073673762e-06, "loss": 0.3083, "step": 2440 }, { "epoch": 27.76, "learning_rate": 8.224468037456969e-06, "loss": 0.3201, "step": 2450 }, { "epoch": 27.88, "learning_rate": 8.154248516341547e-06, "loss": 0.3402, "step": 2460 }, { "epoch": 27.99, "learning_rate": 8.084123090058646e-06, "loss": 0.3128, "step": 2470 }, { "epoch": 28.1, "learning_rate": 8.014095333542548e-06, "loss": 0.2901, "step": 2480 }, { "epoch": 28.22, "learning_rate": 7.944168816748396e-06, "loss": 0.2901, "step": 2490 }, { "epoch": 28.33, "learning_rate": 7.874347104470234e-06, "loss": 0.2886, "step": 2500 }, { "epoch": 28.44, "learning_rate": 7.804633756159258e-06, "loss": 0.2953, "step": 2510 }, { "epoch": 28.56, "learning_rate": 7.735032325742355e-06, "loss": 0.3088, "step": 2520 }, { "epoch": 28.67, "learning_rate": 7.66554636144095e-06, "loss": 0.3004, "step": 2530 }, { "epoch": 28.78, "learning_rate": 7.596179405590076e-06, "loss": 0.3299, "step": 2540 }, { "epoch": 28.9, "learning_rate": 7.5269349944578454e-06, "loss": 0.294, "step": 2550 }, { "epoch": 29.01, "learning_rate": 7.4578166580651335e-06, "loss": 0.2974, "step": 2560 }, { "epoch": 29.12, "learning_rate": 7.388827920005628e-06, "loss": 0.2773, "step": 2570 }, { "epoch": 29.24, "learning_rate": 7.319972297266215e-06, "loss": 0.2718, "step": 2580 }, { "epoch": 29.35, "learning_rate": 7.2512533000476625e-06, "loss": 0.2964, "step": 2590 }, { "epoch": 29.46, "learning_rate": 7.182674431585703e-06, "loss": 0.2784, "step": 2600 }, { "epoch": 29.58, "learning_rate": 7.114239187972416e-06, "loss": 0.3001, "step": 2610 }, { "epoch": 29.69, "learning_rate": 7.045951057978001e-06, "loss": 0.2877, "step": 2620 }, { "epoch": 29.8, "learning_rate": 6.977813522872943e-06, "loss": 0.2964, "step": 2630 }, { "epoch": 29.92, "learning_rate": 6.909830056250527e-06, "loss": 0.2923, "step": 2640 }, { "epoch": 30.03, "learning_rate": 6.8420041238497525e-06, "loss": 0.2839, "step": 2650 }, { "epoch": 30.14, "learning_rate": 6.774339183378663e-06, "loss": 0.2679, "step": 2660 }, { "epoch": 30.25, "learning_rate": 6.7068386843380695e-06, "loss": 0.2751, "step": 2670 }, { "epoch": 30.37, "learning_rate": 6.639506067845698e-06, "loss": 0.2588, "step": 2680 }, { "epoch": 30.48, "learning_rate": 6.572344766460776e-06, "loss": 0.2828, "step": 2690 }, { "epoch": 30.59, "learning_rate": 6.505358204009018e-06, "loss": 0.2904, "step": 2700 }, { "epoch": 30.71, "learning_rate": 6.438549795408107e-06, "loss": 0.2712, "step": 2710 }, { "epoch": 30.82, "learning_rate": 6.3719229464935915e-06, "loss": 0.2765, "step": 2720 }, { "epoch": 30.93, "learning_rate": 6.305481053845262e-06, "loss": 0.2732, "step": 2730 }, { "epoch": 31.05, "learning_rate": 6.239227504614004e-06, "loss": 0.2604, "step": 2740 }, { "epoch": 31.16, "learning_rate": 6.173165676349103e-06, "loss": 0.2535, "step": 2750 }, { "epoch": 31.27, "learning_rate": 6.107298936826086e-06, "loss": 0.2607, "step": 2760 }, { "epoch": 31.39, "learning_rate": 6.041630643875018e-06, "loss": 0.2769, "step": 2770 }, { "epoch": 31.5, "learning_rate": 5.9761641452093225e-06, "loss": 0.2747, "step": 2780 }, { "epoch": 31.61, "learning_rate": 5.910902778255134e-06, "loss": 0.2578, "step": 2790 }, { "epoch": 31.73, "learning_rate": 5.845849869981137e-06, "loss": 0.2566, "step": 2800 }, { "epoch": 31.84, "learning_rate": 5.781008736728975e-06, "loss": 0.2658, "step": 2810 }, { "epoch": 31.95, "learning_rate": 5.716382684044191e-06, "loss": 0.2732, "step": 2820 }, { "epoch": 32.07, "learning_rate": 5.6519750065076815e-06, "loss": 0.2656, "step": 2830 }, { "epoch": 32.18, "learning_rate": 5.587788987567785e-06, "loss": 0.2386, "step": 2840 }, { "epoch": 32.29, "learning_rate": 5.523827899372876e-06, "loss": 0.2554, "step": 2850 }, { "epoch": 32.41, "learning_rate": 5.460095002604533e-06, "loss": 0.2511, "step": 2860 }, { "epoch": 32.52, "learning_rate": 5.396593546311346e-06, "loss": 0.2641, "step": 2870 }, { "epoch": 32.63, "learning_rate": 5.333326767743263e-06, "loss": 0.2553, "step": 2880 }, { "epoch": 32.75, "learning_rate": 5.276589982435913e-06, "loss": 0.2508, "step": 2890 }, { "epoch": 32.86, "learning_rate": 5.213777967188747e-06, "loss": 0.2533, "step": 2900 }, { "epoch": 32.97, "learning_rate": 5.151209949448599e-06, "loss": 0.2621, "step": 2910 }, { "epoch": 33.09, "learning_rate": 5.088889118879516e-06, "loss": 0.2381, "step": 2920 }, { "epoch": 33.2, "learning_rate": 5.02681865254417e-06, "loss": 0.2405, "step": 2930 }, { "epoch": 33.31, "learning_rate": 4.965001714741851e-06, "loss": 0.2473, "step": 2940 }, { "epoch": 33.43, "learning_rate": 4.903441456847198e-06, "loss": 0.2345, "step": 2950 }, { "epoch": 33.54, "learning_rate": 4.8421410171495265e-06, "loss": 0.2349, "step": 2960 }, { "epoch": 33.65, "learning_rate": 4.781103520692839e-06, "loss": 0.2466, "step": 2970 }, { "epoch": 33.77, "learning_rate": 4.720332079116523e-06, "loss": 0.2512, "step": 2980 }, { "epoch": 33.88, "learning_rate": 4.659829790496699e-06, "loss": 0.2633, "step": 2990 }, { "epoch": 33.99, "learning_rate": 4.599599739188322e-06, "loss": 0.247, "step": 3000 }, { "epoch": 34.11, "learning_rate": 4.539644995667911e-06, "loss": 0.2231, "step": 3010 }, { "epoch": 34.22, "learning_rate": 4.479968616377024e-06, "loss": 0.2174, "step": 3020 }, { "epoch": 34.33, "learning_rate": 4.420573643566455e-06, "loss": 0.231, "step": 3030 }, { "epoch": 34.45, "learning_rate": 4.361463105141137e-06, "loss": 0.2545, "step": 3040 }, { "epoch": 34.56, "learning_rate": 4.302640014505779e-06, "loss": 0.2502, "step": 3050 }, { "epoch": 34.67, "learning_rate": 4.244107370411248e-06, "loss": 0.2505, "step": 3060 }, { "epoch": 34.79, "learning_rate": 4.185868156801695e-06, "loss": 0.225, "step": 3070 }, { "epoch": 34.9, "learning_rate": 4.1279253426624345e-06, "loss": 0.2436, "step": 3080 }, { "epoch": 35.01, "learning_rate": 4.07028188186859e-06, "loss": 0.2274, "step": 3090 }, { "epoch": 35.13, "learning_rate": 4.0129407130345114e-06, "loss": 0.214, "step": 3100 }, { "epoch": 35.24, "learning_rate": 3.955904759363958e-06, "loss": 0.2088, "step": 3110 }, { "epoch": 35.35, "learning_rate": 3.899176928501078e-06, "loss": 0.244, "step": 3120 }, { "epoch": 35.47, "learning_rate": 3.842760112382183e-06, "loss": 0.2255, "step": 3130 }, { "epoch": 35.58, "learning_rate": 3.7866571870883382e-06, "loss": 0.2282, "step": 3140 }, { "epoch": 35.69, "learning_rate": 3.7308710126986934e-06, "loss": 0.2415, "step": 3150 }, { "epoch": 35.81, "learning_rate": 3.675404433144727e-06, "loss": 0.2415, "step": 3160 }, { "epoch": 35.92, "learning_rate": 3.6202602760652395e-06, "loss": 0.2301, "step": 3170 }, { "epoch": 36.03, "learning_rate": 3.565441352662211e-06, "loss": 0.2301, "step": 3180 }, { "epoch": 36.15, "learning_rate": 3.510950457557487e-06, "loss": 0.2213, "step": 3190 }, { "epoch": 36.26, "learning_rate": 3.4567903686503103e-06, "loss": 0.2174, "step": 3200 }, { "epoch": 36.37, "learning_rate": 3.4029638469757055e-06, "loss": 0.2236, "step": 3210 }, { "epoch": 36.49, "learning_rate": 3.3494736365637304e-06, "loss": 0.2235, "step": 3220 }, { "epoch": 36.6, "learning_rate": 3.2963224642995675e-06, "loss": 0.222, "step": 3230 }, { "epoch": 36.71, "learning_rate": 3.2435130397845472e-06, "loss": 0.2237, "step": 3240 }, { "epoch": 36.83, "learning_rate": 3.1910480551979706e-06, "loss": 0.2239, "step": 3250 }, { "epoch": 36.94, "learning_rate": 3.1389301851598976e-06, "loss": 0.2149, "step": 3260 }, { "epoch": 37.05, "learning_rate": 3.0871620865947816e-06, "loss": 0.2279, "step": 3270 }, { "epoch": 37.17, "learning_rate": 3.0357463985960257e-06, "loss": 0.2007, "step": 3280 }, { "epoch": 37.28, "learning_rate": 2.9846857422914434e-06, "loss": 0.2132, "step": 3290 }, { "epoch": 37.39, "learning_rate": 2.933982720709637e-06, "loss": 0.2147, "step": 3300 }, { "epoch": 37.51, "learning_rate": 2.883639918647296e-06, "loss": 0.2121, "step": 3310 }, { "epoch": 37.62, "learning_rate": 2.833659902537429e-06, "loss": 0.2147, "step": 3320 }, { "epoch": 37.73, "learning_rate": 2.7840452203185154e-06, "loss": 0.2127, "step": 3330 }, { "epoch": 37.85, "learning_rate": 2.7347984013046435e-06, "loss": 0.2308, "step": 3340 }, { "epoch": 37.96, "learning_rate": 2.6859219560565407e-06, "loss": 0.2152, "step": 3350 }, { "epoch": 38.07, "learning_rate": 2.637418376253591e-06, "loss": 0.2161, "step": 3360 }, { "epoch": 38.19, "learning_rate": 2.589290134566824e-06, "loss": 0.2206, "step": 3370 }, { "epoch": 38.3, "learning_rate": 2.541539684532852e-06, "loss": 0.2256, "step": 3380 }, { "epoch": 38.41, "learning_rate": 2.4941694604287913e-06, "loss": 0.2096, "step": 3390 }, { "epoch": 38.53, "learning_rate": 2.447181877148165e-06, "loss": 0.2058, "step": 3400 }, { "epoch": 38.64, "learning_rate": 2.4005793300777933e-06, "loss": 0.2102, "step": 3410 }, { "epoch": 38.75, "learning_rate": 2.3543641949756835e-06, "loss": 0.2139, "step": 3420 }, { "epoch": 38.87, "learning_rate": 2.30853882784991e-06, "loss": 0.2048, "step": 3430 }, { "epoch": 38.98, "learning_rate": 2.2631055648385e-06, "loss": 0.2071, "step": 3440 }, { "epoch": 39.09, "learning_rate": 2.2180667220903697e-06, "loss": 0.206, "step": 3450 }, { "epoch": 39.21, "learning_rate": 2.1734245956472024e-06, "loss": 0.2049, "step": 3460 }, { "epoch": 39.32, "learning_rate": 2.1291814613264383e-06, "loss": 0.2096, "step": 3470 }, { "epoch": 39.43, "learning_rate": 2.085339574605243e-06, "loss": 0.1986, "step": 3480 }, { "epoch": 39.55, "learning_rate": 2.0419011705055115e-06, "loss": 0.2036, "step": 3490 }, { "epoch": 39.66, "learning_rate": 1.998868463479945e-06, "loss": 0.21, "step": 3500 }, { "epoch": 39.77, "learning_rate": 1.956243647299155e-06, "loss": 0.2155, "step": 3510 }, { "epoch": 39.89, "learning_rate": 1.914028894939829e-06, "loss": 0.2086, "step": 3520 }, { "epoch": 40.0, "learning_rate": 1.8722263584739486e-06, "loss": 0.1997, "step": 3530 }, { "epoch": 40.11, "learning_rate": 1.830838168959075e-06, "loss": 0.1932, "step": 3540 }, { "epoch": 40.23, "learning_rate": 1.7898664363297302e-06, "loss": 0.1997, "step": 3550 }, { "epoch": 40.34, "learning_rate": 1.7493132492898134e-06, "loss": 0.1975, "step": 3560 }, { "epoch": 40.45, "learning_rate": 1.7091806752061212e-06, "loss": 0.2072, "step": 3570 }, { "epoch": 40.57, "learning_rate": 1.6694707600029702e-06, "loss": 0.1786, "step": 3580 }, { "epoch": 40.68, "learning_rate": 1.630185528057886e-06, "loss": 0.2044, "step": 3590 }, { "epoch": 40.79, "learning_rate": 1.5913269820984023e-06, "loss": 0.2129, "step": 3600 }, { "epoch": 40.91, "learning_rate": 1.5528971030999706e-06, "loss": 0.2219, "step": 3610 }, { "epoch": 41.02, "learning_rate": 1.5148978501849642e-06, "loss": 0.2023, "step": 3620 }, { "epoch": 41.13, "learning_rate": 1.4773311605228059e-06, "loss": 0.1975, "step": 3630 }, { "epoch": 41.25, "learning_rate": 1.4401989492312164e-06, "loss": 0.1922, "step": 3640 }, { "epoch": 41.36, "learning_rate": 1.403503109278579e-06, "loss": 0.1868, "step": 3650 }, { "epoch": 41.47, "learning_rate": 1.3672455113874429e-06, "loss": 0.2009, "step": 3660 }, { "epoch": 41.59, "learning_rate": 1.3314280039391426e-06, "loss": 0.2064, "step": 3670 }, { "epoch": 41.7, "learning_rate": 1.2960524128795837e-06, "loss": 0.2036, "step": 3680 }, { "epoch": 41.81, "learning_rate": 1.2611205416261595e-06, "loss": 0.1896, "step": 3690 }, { "epoch": 41.93, "learning_rate": 1.2266341709757946e-06, "loss": 0.2078, "step": 3700 }, { "epoch": 42.04, "learning_rate": 1.192595059014179e-06, "loss": 0.1931, "step": 3710 }, { "epoch": 42.15, "learning_rate": 1.1590049410261384e-06, "loss": 0.203, "step": 3720 }, { "epoch": 42.27, "learning_rate": 1.1258655294071686e-06, "loss": 0.1928, "step": 3730 }, { "epoch": 42.38, "learning_rate": 1.0931785135761375e-06, "loss": 0.1898, "step": 3740 }, { "epoch": 42.49, "learning_rate": 1.0609455598891682e-06, "loss": 0.2001, "step": 3750 }, { "epoch": 42.61, "learning_rate": 1.029168311554678e-06, "loss": 0.2011, "step": 3760 }, { "epoch": 42.72, "learning_rate": 9.978483885496204e-07, "loss": 0.1871, "step": 3770 }, { "epoch": 42.83, "learning_rate": 9.669873875368852e-07, "loss": 0.2027, "step": 3780 }, { "epoch": 42.95, "learning_rate": 9.365868817839264e-07, "loss": 0.1941, "step": 3790 }, { "epoch": 43.06, "learning_rate": 9.066484210825288e-07, "loss": 0.1917, "step": 3800 }, { "epoch": 43.17, "learning_rate": 8.771735316698249e-07, "loss": 0.192, "step": 3810 }, { "epoch": 43.29, "learning_rate": 8.481637161504741e-07, "loss": 0.1959, "step": 3820 }, { "epoch": 43.4, "learning_rate": 8.19620453420068e-07, "loss": 0.192, "step": 3830 }, { "epoch": 43.51, "learning_rate": 7.915451985897382e-07, "loss": 0.2027, "step": 3840 }, { "epoch": 43.63, "learning_rate": 7.639393829119701e-07, "loss": 0.19, "step": 3850 }, { "epoch": 43.74, "learning_rate": 7.368044137076435e-07, "loss": 0.1919, "step": 3860 }, { "epoch": 43.85, "learning_rate": 7.101416742942913e-07, "loss": 0.1897, "step": 3870 }, { "epoch": 43.97, "learning_rate": 6.839525239155675e-07, "loss": 0.199, "step": 3880 }, { "epoch": 44.08, "learning_rate": 6.582382976719703e-07, "loss": 0.2, "step": 3890 }, { "epoch": 44.19, "learning_rate": 6.330003064527679e-07, "loss": 0.1917, "step": 3900 }, { "epoch": 44.31, "learning_rate": 6.082398368691711e-07, "loss": 0.1804, "step": 3910 }, { "epoch": 44.42, "learning_rate": 5.839581511887515e-07, "loss": 0.1971, "step": 3920 }, { "epoch": 44.53, "learning_rate": 5.601564872710851e-07, "loss": 0.1895, "step": 3930 }, { "epoch": 44.65, "learning_rate": 5.36836058504645e-07, "loss": 0.2008, "step": 3940 }, { "epoch": 44.76, "learning_rate": 5.13998053744954e-07, "loss": 0.1871, "step": 3950 }, { "epoch": 44.87, "learning_rate": 4.916436372539668e-07, "loss": 0.1899, "step": 3960 }, { "epoch": 44.99, "learning_rate": 4.697739486407227e-07, "loss": 0.1917, "step": 3970 }, { "epoch": 45.1, "learning_rate": 4.4839010280325003e-07, "loss": 0.1976, "step": 3980 }, { "epoch": 45.21, "learning_rate": 4.2749318987172385e-07, "loss": 0.1915, "step": 3990 }, { "epoch": 45.33, "learning_rate": 4.070842751529025e-07, "loss": 0.2004, "step": 4000 }, { "epoch": 45.44, "learning_rate": 3.871643990758056e-07, "loss": 0.1947, "step": 4010 }, { "epoch": 45.55, "learning_rate": 3.6773457713868423e-07, "loss": 0.1793, "step": 4020 }, { "epoch": 45.67, "learning_rate": 3.487957998572511e-07, "loss": 0.1882, "step": 4030 }, { "epoch": 45.78, "learning_rate": 3.3034903271417564e-07, "loss": 0.1862, "step": 4040 }, { "epoch": 45.89, "learning_rate": 3.1239521610987757e-07, "loss": 0.1862, "step": 4050 }, { "epoch": 46.01, "learning_rate": 2.949352653145754e-07, "loss": 0.1987, "step": 4060 }, { "epoch": 46.12, "learning_rate": 2.779700704216337e-07, "loss": 0.1837, "step": 4070 }, { "epoch": 46.23, "learning_rate": 2.6150049630218277e-07, "loss": 0.1949, "step": 4080 }, { "epoch": 46.35, "learning_rate": 2.4552738256102717e-07, "loss": 0.1892, "step": 4090 }, { "epoch": 46.46, "learning_rate": 2.3005154349385106e-07, "loss": 0.1959, "step": 4100 }, { "epoch": 46.57, "learning_rate": 2.1507376804569935e-07, "loss": 0.1925, "step": 4110 }, { "epoch": 46.69, "learning_rate": 2.0059481977075523e-07, "loss": 0.1933, "step": 4120 }, { "epoch": 46.8, "learning_rate": 1.8661543679342365e-07, "loss": 0.2049, "step": 4130 }, { "epoch": 46.91, "learning_rate": 1.731363317706969e-07, "loss": 0.1756, "step": 4140 }, { "epoch": 47.03, "learning_rate": 1.601581918558237e-07, "loss": 0.1746, "step": 4150 }, { "epoch": 47.14, "learning_rate": 1.4768167866328176e-07, "loss": 0.1963, "step": 4160 }, { "epoch": 47.25, "learning_rate": 1.357074282350457e-07, "loss": 0.1903, "step": 4170 }, { "epoch": 47.37, "learning_rate": 1.2423605100816304e-07, "loss": 0.1821, "step": 4180 }, { "epoch": 47.48, "learning_rate": 1.1326813178363927e-07, "loss": 0.1946, "step": 4190 }, { "epoch": 47.59, "learning_rate": 1.0280422969661696e-07, "loss": 0.1832, "step": 4200 }, { "epoch": 47.71, "learning_rate": 9.284487818787879e-08, "loss": 0.1892, "step": 4210 }, { "epoch": 47.82, "learning_rate": 8.33905849766481e-08, "loss": 0.1883, "step": 4220 }, { "epoch": 47.93, "learning_rate": 7.444183203471067e-08, "loss": 0.1921, "step": 4230 }, { "epoch": 48.05, "learning_rate": 6.599907556184115e-08, "loss": 0.2003, "step": 4240 }, { "epoch": 48.16, "learning_rate": 5.806274596254491e-08, "loss": 0.1789, "step": 4250 }, { "epoch": 48.27, "learning_rate": 5.0633247824118936e-08, "loss": 0.1853, "step": 4260 }, { "epoch": 48.39, "learning_rate": 4.371095989602614e-08, "loss": 0.1968, "step": 4270 }, { "epoch": 48.5, "learning_rate": 3.729623507058744e-08, "loss": 0.2019, "step": 4280 }, { "epoch": 48.61, "learning_rate": 3.13894003649895e-08, "loss": 0.1808, "step": 4290 }, { "epoch": 48.73, "learning_rate": 2.5990756904614723e-08, "loss": 0.1959, "step": 4300 }, { "epoch": 48.84, "learning_rate": 2.1100579907691322e-08, "loss": 0.1781, "step": 4310 }, { "epoch": 48.95, "learning_rate": 1.6719118671262302e-08, "loss": 0.1868, "step": 4320 }, { "epoch": 49.07, "learning_rate": 1.2846596558473424e-08, "loss": 0.1887, "step": 4330 }, { "epoch": 49.18, "learning_rate": 9.483210987193404e-09, "loss": 0.1982, "step": 4340 }, { "epoch": 49.29, "learning_rate": 6.62913341994087e-09, "loss": 0.1957, "step": 4350 }, { "epoch": 49.41, "learning_rate": 4.284509355151345e-09, "loss": 0.1925, "step": 4360 }, { "epoch": 49.52, "learning_rate": 2.449458319754294e-09, "loss": 0.1859, "step": 4370 }, { "epoch": 49.63, "learning_rate": 1.124073863082442e-09, "loss": 0.187, "step": 4380 }, { "epoch": 49.75, "learning_rate": 3.0842355210336515e-10, "loss": 0.1792, "step": 4390 }, { "epoch": 49.86, "learning_rate": 2.548967970028571e-12, "loss": 0.1895, "step": 4400 } ], "max_steps": 4400, "num_train_epochs": 50, "total_flos": 6.496431653421466e+17, "trial_name": null, "trial_params": null }