LLama-2-MedText-Delta / trainer_state.json
aldrinc's picture
Upload 9 files
bb592ee
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.858356940509914,
"global_step": 4400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 1.9999745104274995e-05,
"loss": 1.1665,
"step": 10
},
{
"epoch": 0.23,
"learning_rate": 1.9998980430094333e-05,
"loss": 1.1142,
"step": 20
},
{
"epoch": 0.34,
"learning_rate": 1.9997706016440462e-05,
"loss": 1.0657,
"step": 30
},
{
"epoch": 0.45,
"learning_rate": 1.9995921928281893e-05,
"loss": 1.0229,
"step": 40
},
{
"epoch": 0.57,
"learning_rate": 1.999362825656992e-05,
"loss": 1.0155,
"step": 50
},
{
"epoch": 0.68,
"learning_rate": 1.9990825118233958e-05,
"loss": 0.9919,
"step": 60
},
{
"epoch": 0.79,
"learning_rate": 1.9987512656175612e-05,
"loss": 0.9623,
"step": 70
},
{
"epoch": 0.91,
"learning_rate": 1.9983691039261358e-05,
"loss": 0.937,
"step": 80
},
{
"epoch": 1.02,
"learning_rate": 1.9979360462313965e-05,
"loss": 0.9527,
"step": 90
},
{
"epoch": 1.13,
"learning_rate": 1.9974521146102535e-05,
"loss": 0.9173,
"step": 100
},
{
"epoch": 1.25,
"learning_rate": 1.9969173337331283e-05,
"loss": 0.91,
"step": 110
},
{
"epoch": 1.36,
"learning_rate": 1.9963317308626916e-05,
"loss": 0.9045,
"step": 120
},
{
"epoch": 1.47,
"learning_rate": 1.9956953358524774e-05,
"loss": 0.94,
"step": 130
},
{
"epoch": 1.59,
"learning_rate": 1.9950081811453598e-05,
"loss": 0.9086,
"step": 140
},
{
"epoch": 1.7,
"learning_rate": 1.9942703017718977e-05,
"loss": 0.8946,
"step": 150
},
{
"epoch": 1.81,
"learning_rate": 1.99348173534855e-05,
"loss": 0.8836,
"step": 160
},
{
"epoch": 1.93,
"learning_rate": 1.9926425220757607e-05,
"loss": 0.9001,
"step": 170
},
{
"epoch": 2.04,
"learning_rate": 1.991752704735903e-05,
"loss": 0.8826,
"step": 180
},
{
"epoch": 2.15,
"learning_rate": 1.990812328691105e-05,
"loss": 0.8549,
"step": 190
},
{
"epoch": 2.27,
"learning_rate": 1.989821441880933e-05,
"loss": 0.869,
"step": 200
},
{
"epoch": 2.38,
"learning_rate": 1.9887800948199496e-05,
"loss": 0.8772,
"step": 210
},
{
"epoch": 2.49,
"learning_rate": 1.9876883405951378e-05,
"loss": 0.8691,
"step": 220
},
{
"epoch": 2.61,
"learning_rate": 1.9865462348631945e-05,
"loss": 0.8978,
"step": 230
},
{
"epoch": 2.72,
"learning_rate": 1.9853538358476933e-05,
"loss": 0.8576,
"step": 240
},
{
"epoch": 2.83,
"learning_rate": 1.984111204336116e-05,
"loss": 0.8506,
"step": 250
},
{
"epoch": 2.95,
"learning_rate": 1.9828184036767556e-05,
"loss": 0.8679,
"step": 260
},
{
"epoch": 3.06,
"learning_rate": 1.981475499775484e-05,
"loss": 0.888,
"step": 270
},
{
"epoch": 3.17,
"learning_rate": 1.9800825610923937e-05,
"loss": 0.8369,
"step": 280
},
{
"epoch": 3.29,
"learning_rate": 1.9786396586383078e-05,
"loss": 0.8244,
"step": 290
},
{
"epoch": 3.4,
"learning_rate": 1.9771468659711595e-05,
"loss": 0.8495,
"step": 300
},
{
"epoch": 3.51,
"learning_rate": 1.9756042591922436e-05,
"loss": 0.85,
"step": 310
},
{
"epoch": 3.63,
"learning_rate": 1.9740119169423337e-05,
"loss": 0.8307,
"step": 320
},
{
"epoch": 3.74,
"learning_rate": 1.9723699203976768e-05,
"loss": 0.8419,
"step": 330
},
{
"epoch": 3.85,
"learning_rate": 1.9706783532658528e-05,
"loss": 0.8483,
"step": 340
},
{
"epoch": 3.97,
"learning_rate": 1.9689373017815076e-05,
"loss": 0.8181,
"step": 350
},
{
"epoch": 4.08,
"learning_rate": 1.9671468547019575e-05,
"loss": 0.8203,
"step": 360
},
{
"epoch": 4.19,
"learning_rate": 1.9653071033026635e-05,
"loss": 0.8132,
"step": 370
},
{
"epoch": 4.31,
"learning_rate": 1.963418141372579e-05,
"loss": 0.8299,
"step": 380
},
{
"epoch": 4.42,
"learning_rate": 1.9614800652093685e-05,
"loss": 0.8228,
"step": 390
},
{
"epoch": 4.53,
"learning_rate": 1.9594929736144978e-05,
"loss": 0.8083,
"step": 400
},
{
"epoch": 4.65,
"learning_rate": 1.9574569678881965e-05,
"loss": 0.8054,
"step": 410
},
{
"epoch": 4.76,
"learning_rate": 1.955372151824297e-05,
"loss": 0.8127,
"step": 420
},
{
"epoch": 4.87,
"learning_rate": 1.9532386317049387e-05,
"loss": 0.8047,
"step": 430
},
{
"epoch": 4.99,
"learning_rate": 1.9510565162951538e-05,
"loss": 0.8113,
"step": 440
},
{
"epoch": 5.1,
"learning_rate": 1.9488259168373198e-05,
"loss": 0.7806,
"step": 450
},
{
"epoch": 5.21,
"learning_rate": 1.94654694704549e-05,
"loss": 0.7973,
"step": 460
},
{
"epoch": 5.33,
"learning_rate": 1.944219723099596e-05,
"loss": 0.7968,
"step": 470
},
{
"epoch": 5.44,
"learning_rate": 1.941844363639525e-05,
"loss": 0.7838,
"step": 480
},
{
"epoch": 5.55,
"learning_rate": 1.9394209897590707e-05,
"loss": 0.7798,
"step": 490
},
{
"epoch": 5.67,
"learning_rate": 1.936949724999762e-05,
"loss": 0.7791,
"step": 500
},
{
"epoch": 5.78,
"learning_rate": 1.9344306953445632e-05,
"loss": 0.7632,
"step": 510
},
{
"epoch": 5.89,
"learning_rate": 1.9318640292114526e-05,
"loss": 0.7984,
"step": 520
},
{
"epoch": 6.01,
"learning_rate": 1.929249857446875e-05,
"loss": 0.7763,
"step": 530
},
{
"epoch": 6.12,
"learning_rate": 1.9265883133190715e-05,
"loss": 0.7489,
"step": 540
},
{
"epoch": 6.23,
"learning_rate": 1.9238795325112867e-05,
"loss": 0.7739,
"step": 550
},
{
"epoch": 6.35,
"learning_rate": 1.92112365311485e-05,
"loss": 0.7664,
"step": 560
},
{
"epoch": 6.46,
"learning_rate": 1.918320815622137e-05,
"loss": 0.76,
"step": 570
},
{
"epoch": 6.57,
"learning_rate": 1.9154711629194062e-05,
"loss": 0.7604,
"step": 580
},
{
"epoch": 6.69,
"learning_rate": 1.912574840279516e-05,
"loss": 0.7598,
"step": 590
},
{
"epoch": 6.8,
"learning_rate": 1.9096319953545186e-05,
"loss": 0.7585,
"step": 600
},
{
"epoch": 6.91,
"learning_rate": 1.9066427781681314e-05,
"loss": 0.7312,
"step": 610
},
{
"epoch": 7.03,
"learning_rate": 1.9036073411080917e-05,
"loss": 0.732,
"step": 620
},
{
"epoch": 7.14,
"learning_rate": 1.900525838918385e-05,
"loss": 0.7392,
"step": 630
},
{
"epoch": 7.25,
"learning_rate": 1.8973984286913584e-05,
"loss": 0.6986,
"step": 640
},
{
"epoch": 7.37,
"learning_rate": 1.8942252698597113e-05,
"loss": 0.7281,
"step": 650
},
{
"epoch": 7.48,
"learning_rate": 1.891006524188368e-05,
"loss": 0.7316,
"step": 660
},
{
"epoch": 7.59,
"learning_rate": 1.8877423557662307e-05,
"loss": 0.7329,
"step": 670
},
{
"epoch": 7.71,
"learning_rate": 1.8844329309978146e-05,
"loss": 0.7409,
"step": 680
},
{
"epoch": 7.82,
"learning_rate": 1.8810784185947648e-05,
"loss": 0.7345,
"step": 690
},
{
"epoch": 7.93,
"learning_rate": 1.8776789895672557e-05,
"loss": 0.7418,
"step": 700
},
{
"epoch": 8.05,
"learning_rate": 1.8742348172152728e-05,
"loss": 0.7426,
"step": 710
},
{
"epoch": 8.16,
"learning_rate": 1.8707460771197773e-05,
"loss": 0.6952,
"step": 720
},
{
"epoch": 8.27,
"learning_rate": 1.8672129471337568e-05,
"loss": 0.7136,
"step": 730
},
{
"epoch": 8.39,
"learning_rate": 1.863635607373157e-05,
"loss": 0.7284,
"step": 740
},
{
"epoch": 8.5,
"learning_rate": 1.8600142402077006e-05,
"loss": 0.6978,
"step": 750
},
{
"epoch": 8.61,
"learning_rate": 1.856349030251589e-05,
"loss": 0.6843,
"step": 760
},
{
"epoch": 8.73,
"learning_rate": 1.8526401643540924e-05,
"loss": 0.6895,
"step": 770
},
{
"epoch": 8.84,
"learning_rate": 1.8488878315900228e-05,
"loss": 0.7084,
"step": 780
},
{
"epoch": 8.95,
"learning_rate": 1.8450922232500966e-05,
"loss": 0.7028,
"step": 790
},
{
"epoch": 9.07,
"learning_rate": 1.8412535328311813e-05,
"loss": 0.6905,
"step": 800
},
{
"epoch": 9.18,
"learning_rate": 1.837371956026433e-05,
"loss": 0.6851,
"step": 810
},
{
"epoch": 9.29,
"learning_rate": 1.8334476907153177e-05,
"loss": 0.6759,
"step": 820
},
{
"epoch": 9.41,
"learning_rate": 1.8294809369535265e-05,
"loss": 0.6613,
"step": 830
},
{
"epoch": 9.52,
"learning_rate": 1.825471896962774e-05,
"loss": 0.6814,
"step": 840
},
{
"epoch": 9.63,
"learning_rate": 1.8214207751204917e-05,
"loss": 0.7044,
"step": 850
},
{
"epoch": 9.75,
"learning_rate": 1.817327777949407e-05,
"loss": 0.6753,
"step": 860
},
{
"epoch": 9.86,
"learning_rate": 1.8131931141070166e-05,
"loss": 0.6748,
"step": 870
},
{
"epoch": 9.97,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.6853,
"step": 880
},
{
"epoch": 10.08,
"learning_rate": 1.8047996316482134e-05,
"loss": 0.6406,
"step": 890
},
{
"epoch": 10.2,
"learning_rate": 1.8005412409243604e-05,
"loss": 0.6391,
"step": 900
},
{
"epoch": 10.31,
"learning_rate": 1.7962420392925066e-05,
"loss": 0.6524,
"step": 910
},
{
"epoch": 10.42,
"learning_rate": 1.7919022459222754e-05,
"loss": 0.6618,
"step": 920
},
{
"epoch": 10.54,
"learning_rate": 1.787522082052622e-05,
"loss": 0.6604,
"step": 930
},
{
"epoch": 10.65,
"learning_rate": 1.7831017709805555e-05,
"loss": 0.6706,
"step": 940
},
{
"epoch": 10.76,
"learning_rate": 1.778641538049755e-05,
"loss": 0.6589,
"step": 950
},
{
"epoch": 10.88,
"learning_rate": 1.7741416106390828e-05,
"loss": 0.6631,
"step": 960
},
{
"epoch": 10.99,
"learning_rate": 1.7696022181509892e-05,
"loss": 0.6408,
"step": 970
},
{
"epoch": 11.1,
"learning_rate": 1.7650235919998234e-05,
"loss": 0.6077,
"step": 980
},
{
"epoch": 11.22,
"learning_rate": 1.7604059656000313e-05,
"loss": 0.6267,
"step": 990
},
{
"epoch": 11.33,
"learning_rate": 1.7557495743542586e-05,
"loss": 0.6324,
"step": 1000
},
{
"epoch": 11.44,
"learning_rate": 1.75105465564135e-05,
"loss": 0.645,
"step": 1010
},
{
"epoch": 11.56,
"learning_rate": 1.7463214488042472e-05,
"loss": 0.6167,
"step": 1020
},
{
"epoch": 11.67,
"learning_rate": 1.741550195137788e-05,
"loss": 0.6311,
"step": 1030
},
{
"epoch": 11.78,
"learning_rate": 1.736741137876405e-05,
"loss": 0.6329,
"step": 1040
},
{
"epoch": 11.9,
"learning_rate": 1.7318945221817255e-05,
"loss": 0.6469,
"step": 1050
},
{
"epoch": 12.01,
"learning_rate": 1.727010595130074e-05,
"loss": 0.6334,
"step": 1060
},
{
"epoch": 12.12,
"learning_rate": 1.7220896056998753e-05,
"loss": 0.5946,
"step": 1070
},
{
"epoch": 12.24,
"learning_rate": 1.7171318047589637e-05,
"loss": 0.5695,
"step": 1080
},
{
"epoch": 12.35,
"learning_rate": 1.712137445051792e-05,
"loss": 0.6101,
"step": 1090
},
{
"epoch": 12.46,
"learning_rate": 1.7071067811865477e-05,
"loss": 0.6203,
"step": 1100
},
{
"epoch": 12.58,
"learning_rate": 1.7020400696221737e-05,
"loss": 0.598,
"step": 1110
},
{
"epoch": 12.69,
"learning_rate": 1.696937568655294e-05,
"loss": 0.6177,
"step": 1120
},
{
"epoch": 12.8,
"learning_rate": 1.691799538407044e-05,
"loss": 0.6153,
"step": 1130
},
{
"epoch": 12.92,
"learning_rate": 1.6866262408098134e-05,
"loss": 0.6096,
"step": 1140
},
{
"epoch": 13.03,
"learning_rate": 1.6814179395938915e-05,
"loss": 0.612,
"step": 1150
},
{
"epoch": 13.14,
"learning_rate": 1.6761749002740195e-05,
"loss": 0.5858,
"step": 1160
},
{
"epoch": 13.26,
"learning_rate": 1.6708973901358603e-05,
"loss": 0.5715,
"step": 1170
},
{
"epoch": 13.37,
"learning_rate": 1.6655856782223682e-05,
"loss": 0.5834,
"step": 1180
},
{
"epoch": 13.48,
"learning_rate": 1.660240035320075e-05,
"loss": 0.5782,
"step": 1190
},
{
"epoch": 13.6,
"learning_rate": 1.6548607339452853e-05,
"loss": 0.5621,
"step": 1200
},
{
"epoch": 13.71,
"learning_rate": 1.6494480483301836e-05,
"loss": 0.5729,
"step": 1210
},
{
"epoch": 13.82,
"learning_rate": 1.6440022544088553e-05,
"loss": 0.5958,
"step": 1220
},
{
"epoch": 13.94,
"learning_rate": 1.6385236298032183e-05,
"loss": 0.6007,
"step": 1230
},
{
"epoch": 14.05,
"learning_rate": 1.6330124538088705e-05,
"loss": 0.5658,
"step": 1240
},
{
"epoch": 14.16,
"learning_rate": 1.627469007380852e-05,
"loss": 0.5674,
"step": 1250
},
{
"epoch": 14.28,
"learning_rate": 1.6218935731193223e-05,
"loss": 0.5649,
"step": 1260
},
{
"epoch": 14.39,
"learning_rate": 1.616286435255153e-05,
"loss": 0.5497,
"step": 1270
},
{
"epoch": 14.5,
"learning_rate": 1.6106478796354382e-05,
"loss": 0.5865,
"step": 1280
},
{
"epoch": 14.62,
"learning_rate": 1.6049781937089227e-05,
"loss": 0.547,
"step": 1290
},
{
"epoch": 14.73,
"learning_rate": 1.599277666511347e-05,
"loss": 0.5473,
"step": 1300
},
{
"epoch": 14.84,
"learning_rate": 1.5935465886507143e-05,
"loss": 0.5553,
"step": 1310
},
{
"epoch": 14.96,
"learning_rate": 1.5877852522924733e-05,
"loss": 0.5672,
"step": 1320
},
{
"epoch": 15.07,
"learning_rate": 1.581993951144626e-05,
"loss": 0.5392,
"step": 1330
},
{
"epoch": 15.18,
"learning_rate": 1.576172980442753e-05,
"loss": 0.504,
"step": 1340
},
{
"epoch": 15.3,
"learning_rate": 1.5703226369349642e-05,
"loss": 0.5446,
"step": 1350
},
{
"epoch": 15.41,
"learning_rate": 1.5644432188667695e-05,
"loss": 0.5323,
"step": 1360
},
{
"epoch": 15.52,
"learning_rate": 1.5585350259658748e-05,
"loss": 0.54,
"step": 1370
},
{
"epoch": 15.64,
"learning_rate": 1.5525983594269026e-05,
"loss": 0.5387,
"step": 1380
},
{
"epoch": 15.75,
"learning_rate": 1.546633521896038e-05,
"loss": 0.5349,
"step": 1390
},
{
"epoch": 15.86,
"learning_rate": 1.5406408174555978e-05,
"loss": 0.5303,
"step": 1400
},
{
"epoch": 15.98,
"learning_rate": 1.5346205516085305e-05,
"loss": 0.561,
"step": 1410
},
{
"epoch": 16.09,
"learning_rate": 1.528573031262842e-05,
"loss": 0.5295,
"step": 1420
},
{
"epoch": 16.2,
"learning_rate": 1.5224985647159489e-05,
"loss": 0.4995,
"step": 1430
},
{
"epoch": 16.32,
"learning_rate": 1.5163974616389621e-05,
"loss": 0.5151,
"step": 1440
},
{
"epoch": 16.43,
"learning_rate": 1.5102700330609e-05,
"loss": 0.5042,
"step": 1450
},
{
"epoch": 16.54,
"learning_rate": 1.504116591352832e-05,
"loss": 0.502,
"step": 1460
},
{
"epoch": 16.66,
"learning_rate": 1.497937450211956e-05,
"loss": 0.5101,
"step": 1470
},
{
"epoch": 16.77,
"learning_rate": 1.491732924645604e-05,
"loss": 0.5323,
"step": 1480
},
{
"epoch": 16.88,
"learning_rate": 1.4855033309551842e-05,
"loss": 0.509,
"step": 1490
},
{
"epoch": 17.0,
"learning_rate": 1.479248986720057e-05,
"loss": 0.5292,
"step": 1500
},
{
"epoch": 17.11,
"learning_rate": 1.4729702107813438e-05,
"loss": 0.4835,
"step": 1510
},
{
"epoch": 17.22,
"learning_rate": 1.4666673232256738e-05,
"loss": 0.4934,
"step": 1520
},
{
"epoch": 17.34,
"learning_rate": 1.4603406453688656e-05,
"loss": 0.4811,
"step": 1530
},
{
"epoch": 17.45,
"learning_rate": 1.4539904997395468e-05,
"loss": 0.4966,
"step": 1540
},
{
"epoch": 17.56,
"learning_rate": 1.4476172100627127e-05,
"loss": 0.4906,
"step": 1550
},
{
"epoch": 17.68,
"learning_rate": 1.4412211012432213e-05,
"loss": 0.4956,
"step": 1560
},
{
"epoch": 17.79,
"learning_rate": 1.4348024993492323e-05,
"loss": 0.5003,
"step": 1570
},
{
"epoch": 17.9,
"learning_rate": 1.4283617315955815e-05,
"loss": 0.5131,
"step": 1580
},
{
"epoch": 18.02,
"learning_rate": 1.4218991263271024e-05,
"loss": 0.4642,
"step": 1590
},
{
"epoch": 18.13,
"learning_rate": 1.4154150130018867e-05,
"loss": 0.4679,
"step": 1600
},
{
"epoch": 18.24,
"learning_rate": 1.408909722174487e-05,
"loss": 0.4555,
"step": 1610
},
{
"epoch": 18.36,
"learning_rate": 1.4023835854790682e-05,
"loss": 0.4663,
"step": 1620
},
{
"epoch": 18.47,
"learning_rate": 1.3958369356124986e-05,
"loss": 0.4991,
"step": 1630
},
{
"epoch": 18.58,
"learning_rate": 1.3892701063173917e-05,
"loss": 0.4592,
"step": 1640
},
{
"epoch": 18.7,
"learning_rate": 1.3826834323650899e-05,
"loss": 0.4697,
"step": 1650
},
{
"epoch": 18.81,
"learning_rate": 1.3760772495385998e-05,
"loss": 0.4812,
"step": 1660
},
{
"epoch": 18.92,
"learning_rate": 1.369451894615474e-05,
"loss": 0.4868,
"step": 1670
},
{
"epoch": 19.04,
"learning_rate": 1.362807705350641e-05,
"loss": 0.4511,
"step": 1680
},
{
"epoch": 19.15,
"learning_rate": 1.3561450204591898e-05,
"loss": 0.4332,
"step": 1690
},
{
"epoch": 19.26,
"learning_rate": 1.3494641795990986e-05,
"loss": 0.4392,
"step": 1700
},
{
"epoch": 19.38,
"learning_rate": 1.3427655233539227e-05,
"loss": 0.435,
"step": 1710
},
{
"epoch": 19.49,
"learning_rate": 1.3360493932154301e-05,
"loss": 0.4377,
"step": 1720
},
{
"epoch": 19.6,
"learning_rate": 1.3293161315661934e-05,
"loss": 0.4509,
"step": 1730
},
{
"epoch": 19.72,
"learning_rate": 1.3225660816621342e-05,
"loss": 0.4564,
"step": 1740
},
{
"epoch": 19.83,
"learning_rate": 1.3157995876150252e-05,
"loss": 0.471,
"step": 1750
},
{
"epoch": 19.94,
"learning_rate": 1.3090169943749475e-05,
"loss": 0.4759,
"step": 1760
},
{
"epoch": 20.06,
"learning_rate": 1.302218647712706e-05,
"loss": 0.454,
"step": 1770
},
{
"epoch": 20.17,
"learning_rate": 1.2954048942022002e-05,
"loss": 0.3999,
"step": 1780
},
{
"epoch": 20.28,
"learning_rate": 1.288576081202759e-05,
"loss": 0.415,
"step": 1790
},
{
"epoch": 20.4,
"learning_rate": 1.2817325568414299e-05,
"loss": 0.4349,
"step": 1800
},
{
"epoch": 20.51,
"learning_rate": 1.2748746699952338e-05,
"loss": 0.4281,
"step": 1810
},
{
"epoch": 20.62,
"learning_rate": 1.2680027702733791e-05,
"loss": 0.4391,
"step": 1820
},
{
"epoch": 20.74,
"learning_rate": 1.2611172079994377e-05,
"loss": 0.4288,
"step": 1830
},
{
"epoch": 20.85,
"learning_rate": 1.2542183341934873e-05,
"loss": 0.4562,
"step": 1840
},
{
"epoch": 20.96,
"learning_rate": 1.2473065005542155e-05,
"loss": 0.4294,
"step": 1850
},
{
"epoch": 21.08,
"learning_rate": 1.2403820594409926e-05,
"loss": 0.4147,
"step": 1860
},
{
"epoch": 21.19,
"learning_rate": 1.2334453638559057e-05,
"loss": 0.4003,
"step": 1870
},
{
"epoch": 21.3,
"learning_rate": 1.2264967674257647e-05,
"loss": 0.402,
"step": 1880
},
{
"epoch": 21.42,
"learning_rate": 1.2195366243840745e-05,
"loss": 0.3808,
"step": 1890
},
{
"epoch": 21.53,
"learning_rate": 1.2125652895529766e-05,
"loss": 0.4199,
"step": 1900
},
{
"epoch": 21.64,
"learning_rate": 1.2055831183251608e-05,
"loss": 0.4153,
"step": 1910
},
{
"epoch": 21.76,
"learning_rate": 1.1985904666457455e-05,
"loss": 0.4206,
"step": 1920
},
{
"epoch": 21.87,
"learning_rate": 1.1915876909941356e-05,
"loss": 0.4326,
"step": 1930
},
{
"epoch": 21.98,
"learning_rate": 1.1845751483658454e-05,
"loss": 0.4263,
"step": 1940
},
{
"epoch": 22.1,
"learning_rate": 1.1775531962543036e-05,
"loss": 0.3938,
"step": 1950
},
{
"epoch": 22.21,
"learning_rate": 1.170522192632624e-05,
"loss": 0.3748,
"step": 1960
},
{
"epoch": 22.32,
"learning_rate": 1.1634824959353602e-05,
"loss": 0.3757,
"step": 1970
},
{
"epoch": 22.44,
"learning_rate": 1.156434465040231e-05,
"loss": 0.3876,
"step": 1980
},
{
"epoch": 22.55,
"learning_rate": 1.1493784592498252e-05,
"loss": 0.395,
"step": 1990
},
{
"epoch": 22.66,
"learning_rate": 1.1423148382732854e-05,
"loss": 0.3938,
"step": 2000
},
{
"epoch": 22.78,
"learning_rate": 1.1352439622079689e-05,
"loss": 0.4146,
"step": 2010
},
{
"epoch": 22.89,
"learning_rate": 1.1281661915210931e-05,
"loss": 0.4206,
"step": 2020
},
{
"epoch": 23.0,
"learning_rate": 1.1210818870313548e-05,
"loss": 0.3975,
"step": 2030
},
{
"epoch": 23.12,
"learning_rate": 1.1139914098905406e-05,
"loss": 0.3613,
"step": 2040
},
{
"epoch": 23.23,
"learning_rate": 1.1068951215651132e-05,
"loss": 0.3572,
"step": 2050
},
{
"epoch": 23.34,
"learning_rate": 1.0997933838177828e-05,
"loss": 0.3773,
"step": 2060
},
{
"epoch": 23.46,
"learning_rate": 1.0926865586890689e-05,
"loss": 0.3846,
"step": 2070
},
{
"epoch": 23.57,
"learning_rate": 1.08557500847884e-05,
"loss": 0.3758,
"step": 2080
},
{
"epoch": 23.68,
"learning_rate": 1.0784590957278452e-05,
"loss": 0.393,
"step": 2090
},
{
"epoch": 23.8,
"learning_rate": 1.0713391831992324e-05,
"loss": 0.3699,
"step": 2100
},
{
"epoch": 23.91,
"learning_rate": 1.064215633860055e-05,
"loss": 0.3639,
"step": 2110
},
{
"epoch": 24.02,
"learning_rate": 1.0570888108627682e-05,
"loss": 0.3869,
"step": 2120
},
{
"epoch": 24.14,
"learning_rate": 1.0499590775267142e-05,
"loss": 0.3387,
"step": 2130
},
{
"epoch": 24.25,
"learning_rate": 1.0428267973196027e-05,
"loss": 0.3507,
"step": 2140
},
{
"epoch": 24.36,
"learning_rate": 1.0356923338389807e-05,
"loss": 0.3424,
"step": 2150
},
{
"epoch": 24.48,
"learning_rate": 1.0285560507936962e-05,
"loss": 0.3615,
"step": 2160
},
{
"epoch": 24.59,
"learning_rate": 1.0214183119853583e-05,
"loss": 0.3585,
"step": 2170
},
{
"epoch": 24.7,
"learning_rate": 1.0142794812897874e-05,
"loss": 0.3519,
"step": 2180
},
{
"epoch": 24.82,
"learning_rate": 1.0071399226384695e-05,
"loss": 0.3734,
"step": 2190
},
{
"epoch": 24.93,
"learning_rate": 1e-05,
"loss": 0.3788,
"step": 2200
},
{
"epoch": 25.04,
"learning_rate": 9.928600773615306e-06,
"loss": 0.3616,
"step": 2210
},
{
"epoch": 25.16,
"learning_rate": 9.85720518710213e-06,
"loss": 0.3345,
"step": 2220
},
{
"epoch": 25.27,
"learning_rate": 9.78581688014642e-06,
"loss": 0.3483,
"step": 2230
},
{
"epoch": 25.38,
"learning_rate": 9.71443949206304e-06,
"loss": 0.327,
"step": 2240
},
{
"epoch": 25.5,
"learning_rate": 9.643076661610197e-06,
"loss": 0.3422,
"step": 2250
},
{
"epoch": 25.61,
"learning_rate": 9.571732026803978e-06,
"loss": 0.3472,
"step": 2260
},
{
"epoch": 25.72,
"learning_rate": 9.500409224732863e-06,
"loss": 0.3475,
"step": 2270
},
{
"epoch": 25.84,
"learning_rate": 9.42911189137232e-06,
"loss": 0.3507,
"step": 2280
},
{
"epoch": 25.95,
"learning_rate": 9.357843661399447e-06,
"loss": 0.3534,
"step": 2290
},
{
"epoch": 26.06,
"learning_rate": 9.286608168007678e-06,
"loss": 0.3295,
"step": 2300
},
{
"epoch": 26.18,
"learning_rate": 9.215409042721553e-06,
"loss": 0.3187,
"step": 2310
},
{
"epoch": 26.29,
"learning_rate": 9.144249915211605e-06,
"loss": 0.3465,
"step": 2320
},
{
"epoch": 26.4,
"learning_rate": 9.073134413109313e-06,
"loss": 0.3262,
"step": 2330
},
{
"epoch": 26.52,
"learning_rate": 9.002066161822174e-06,
"loss": 0.3154,
"step": 2340
},
{
"epoch": 26.63,
"learning_rate": 8.931048784348875e-06,
"loss": 0.3277,
"step": 2350
},
{
"epoch": 26.74,
"learning_rate": 8.860085901094595e-06,
"loss": 0.3387,
"step": 2360
},
{
"epoch": 26.86,
"learning_rate": 8.789181129686452e-06,
"loss": 0.323,
"step": 2370
},
{
"epoch": 26.97,
"learning_rate": 8.718338084789074e-06,
"loss": 0.3243,
"step": 2380
},
{
"epoch": 27.08,
"learning_rate": 8.647560377920311e-06,
"loss": 0.3112,
"step": 2390
},
{
"epoch": 27.2,
"learning_rate": 8.576851617267151e-06,
"loss": 0.3138,
"step": 2400
},
{
"epoch": 27.31,
"learning_rate": 8.50621540750175e-06,
"loss": 0.294,
"step": 2410
},
{
"epoch": 27.42,
"learning_rate": 8.43565534959769e-06,
"loss": 0.3009,
"step": 2420
},
{
"epoch": 27.54,
"learning_rate": 8.365175040646403e-06,
"loss": 0.3217,
"step": 2430
},
{
"epoch": 27.65,
"learning_rate": 8.294778073673762e-06,
"loss": 0.3083,
"step": 2440
},
{
"epoch": 27.76,
"learning_rate": 8.224468037456969e-06,
"loss": 0.3201,
"step": 2450
},
{
"epoch": 27.88,
"learning_rate": 8.154248516341547e-06,
"loss": 0.3402,
"step": 2460
},
{
"epoch": 27.99,
"learning_rate": 8.084123090058646e-06,
"loss": 0.3128,
"step": 2470
},
{
"epoch": 28.1,
"learning_rate": 8.014095333542548e-06,
"loss": 0.2901,
"step": 2480
},
{
"epoch": 28.22,
"learning_rate": 7.944168816748396e-06,
"loss": 0.2901,
"step": 2490
},
{
"epoch": 28.33,
"learning_rate": 7.874347104470234e-06,
"loss": 0.2886,
"step": 2500
},
{
"epoch": 28.44,
"learning_rate": 7.804633756159258e-06,
"loss": 0.2953,
"step": 2510
},
{
"epoch": 28.56,
"learning_rate": 7.735032325742355e-06,
"loss": 0.3088,
"step": 2520
},
{
"epoch": 28.67,
"learning_rate": 7.66554636144095e-06,
"loss": 0.3004,
"step": 2530
},
{
"epoch": 28.78,
"learning_rate": 7.596179405590076e-06,
"loss": 0.3299,
"step": 2540
},
{
"epoch": 28.9,
"learning_rate": 7.5269349944578454e-06,
"loss": 0.294,
"step": 2550
},
{
"epoch": 29.01,
"learning_rate": 7.4578166580651335e-06,
"loss": 0.2974,
"step": 2560
},
{
"epoch": 29.12,
"learning_rate": 7.388827920005628e-06,
"loss": 0.2773,
"step": 2570
},
{
"epoch": 29.24,
"learning_rate": 7.319972297266215e-06,
"loss": 0.2718,
"step": 2580
},
{
"epoch": 29.35,
"learning_rate": 7.2512533000476625e-06,
"loss": 0.2964,
"step": 2590
},
{
"epoch": 29.46,
"learning_rate": 7.182674431585703e-06,
"loss": 0.2784,
"step": 2600
},
{
"epoch": 29.58,
"learning_rate": 7.114239187972416e-06,
"loss": 0.3001,
"step": 2610
},
{
"epoch": 29.69,
"learning_rate": 7.045951057978001e-06,
"loss": 0.2877,
"step": 2620
},
{
"epoch": 29.8,
"learning_rate": 6.977813522872943e-06,
"loss": 0.2964,
"step": 2630
},
{
"epoch": 29.92,
"learning_rate": 6.909830056250527e-06,
"loss": 0.2923,
"step": 2640
},
{
"epoch": 30.03,
"learning_rate": 6.8420041238497525e-06,
"loss": 0.2839,
"step": 2650
},
{
"epoch": 30.14,
"learning_rate": 6.774339183378663e-06,
"loss": 0.2679,
"step": 2660
},
{
"epoch": 30.25,
"learning_rate": 6.7068386843380695e-06,
"loss": 0.2751,
"step": 2670
},
{
"epoch": 30.37,
"learning_rate": 6.639506067845698e-06,
"loss": 0.2588,
"step": 2680
},
{
"epoch": 30.48,
"learning_rate": 6.572344766460776e-06,
"loss": 0.2828,
"step": 2690
},
{
"epoch": 30.59,
"learning_rate": 6.505358204009018e-06,
"loss": 0.2904,
"step": 2700
},
{
"epoch": 30.71,
"learning_rate": 6.438549795408107e-06,
"loss": 0.2712,
"step": 2710
},
{
"epoch": 30.82,
"learning_rate": 6.3719229464935915e-06,
"loss": 0.2765,
"step": 2720
},
{
"epoch": 30.93,
"learning_rate": 6.305481053845262e-06,
"loss": 0.2732,
"step": 2730
},
{
"epoch": 31.05,
"learning_rate": 6.239227504614004e-06,
"loss": 0.2604,
"step": 2740
},
{
"epoch": 31.16,
"learning_rate": 6.173165676349103e-06,
"loss": 0.2535,
"step": 2750
},
{
"epoch": 31.27,
"learning_rate": 6.107298936826086e-06,
"loss": 0.2607,
"step": 2760
},
{
"epoch": 31.39,
"learning_rate": 6.041630643875018e-06,
"loss": 0.2769,
"step": 2770
},
{
"epoch": 31.5,
"learning_rate": 5.9761641452093225e-06,
"loss": 0.2747,
"step": 2780
},
{
"epoch": 31.61,
"learning_rate": 5.910902778255134e-06,
"loss": 0.2578,
"step": 2790
},
{
"epoch": 31.73,
"learning_rate": 5.845849869981137e-06,
"loss": 0.2566,
"step": 2800
},
{
"epoch": 31.84,
"learning_rate": 5.781008736728975e-06,
"loss": 0.2658,
"step": 2810
},
{
"epoch": 31.95,
"learning_rate": 5.716382684044191e-06,
"loss": 0.2732,
"step": 2820
},
{
"epoch": 32.07,
"learning_rate": 5.6519750065076815e-06,
"loss": 0.2656,
"step": 2830
},
{
"epoch": 32.18,
"learning_rate": 5.587788987567785e-06,
"loss": 0.2386,
"step": 2840
},
{
"epoch": 32.29,
"learning_rate": 5.523827899372876e-06,
"loss": 0.2554,
"step": 2850
},
{
"epoch": 32.41,
"learning_rate": 5.460095002604533e-06,
"loss": 0.2511,
"step": 2860
},
{
"epoch": 32.52,
"learning_rate": 5.396593546311346e-06,
"loss": 0.2641,
"step": 2870
},
{
"epoch": 32.63,
"learning_rate": 5.333326767743263e-06,
"loss": 0.2553,
"step": 2880
},
{
"epoch": 32.75,
"learning_rate": 5.276589982435913e-06,
"loss": 0.2508,
"step": 2890
},
{
"epoch": 32.86,
"learning_rate": 5.213777967188747e-06,
"loss": 0.2533,
"step": 2900
},
{
"epoch": 32.97,
"learning_rate": 5.151209949448599e-06,
"loss": 0.2621,
"step": 2910
},
{
"epoch": 33.09,
"learning_rate": 5.088889118879516e-06,
"loss": 0.2381,
"step": 2920
},
{
"epoch": 33.2,
"learning_rate": 5.02681865254417e-06,
"loss": 0.2405,
"step": 2930
},
{
"epoch": 33.31,
"learning_rate": 4.965001714741851e-06,
"loss": 0.2473,
"step": 2940
},
{
"epoch": 33.43,
"learning_rate": 4.903441456847198e-06,
"loss": 0.2345,
"step": 2950
},
{
"epoch": 33.54,
"learning_rate": 4.8421410171495265e-06,
"loss": 0.2349,
"step": 2960
},
{
"epoch": 33.65,
"learning_rate": 4.781103520692839e-06,
"loss": 0.2466,
"step": 2970
},
{
"epoch": 33.77,
"learning_rate": 4.720332079116523e-06,
"loss": 0.2512,
"step": 2980
},
{
"epoch": 33.88,
"learning_rate": 4.659829790496699e-06,
"loss": 0.2633,
"step": 2990
},
{
"epoch": 33.99,
"learning_rate": 4.599599739188322e-06,
"loss": 0.247,
"step": 3000
},
{
"epoch": 34.11,
"learning_rate": 4.539644995667911e-06,
"loss": 0.2231,
"step": 3010
},
{
"epoch": 34.22,
"learning_rate": 4.479968616377024e-06,
"loss": 0.2174,
"step": 3020
},
{
"epoch": 34.33,
"learning_rate": 4.420573643566455e-06,
"loss": 0.231,
"step": 3030
},
{
"epoch": 34.45,
"learning_rate": 4.361463105141137e-06,
"loss": 0.2545,
"step": 3040
},
{
"epoch": 34.56,
"learning_rate": 4.302640014505779e-06,
"loss": 0.2502,
"step": 3050
},
{
"epoch": 34.67,
"learning_rate": 4.244107370411248e-06,
"loss": 0.2505,
"step": 3060
},
{
"epoch": 34.79,
"learning_rate": 4.185868156801695e-06,
"loss": 0.225,
"step": 3070
},
{
"epoch": 34.9,
"learning_rate": 4.1279253426624345e-06,
"loss": 0.2436,
"step": 3080
},
{
"epoch": 35.01,
"learning_rate": 4.07028188186859e-06,
"loss": 0.2274,
"step": 3090
},
{
"epoch": 35.13,
"learning_rate": 4.0129407130345114e-06,
"loss": 0.214,
"step": 3100
},
{
"epoch": 35.24,
"learning_rate": 3.955904759363958e-06,
"loss": 0.2088,
"step": 3110
},
{
"epoch": 35.35,
"learning_rate": 3.899176928501078e-06,
"loss": 0.244,
"step": 3120
},
{
"epoch": 35.47,
"learning_rate": 3.842760112382183e-06,
"loss": 0.2255,
"step": 3130
},
{
"epoch": 35.58,
"learning_rate": 3.7866571870883382e-06,
"loss": 0.2282,
"step": 3140
},
{
"epoch": 35.69,
"learning_rate": 3.7308710126986934e-06,
"loss": 0.2415,
"step": 3150
},
{
"epoch": 35.81,
"learning_rate": 3.675404433144727e-06,
"loss": 0.2415,
"step": 3160
},
{
"epoch": 35.92,
"learning_rate": 3.6202602760652395e-06,
"loss": 0.2301,
"step": 3170
},
{
"epoch": 36.03,
"learning_rate": 3.565441352662211e-06,
"loss": 0.2301,
"step": 3180
},
{
"epoch": 36.15,
"learning_rate": 3.510950457557487e-06,
"loss": 0.2213,
"step": 3190
},
{
"epoch": 36.26,
"learning_rate": 3.4567903686503103e-06,
"loss": 0.2174,
"step": 3200
},
{
"epoch": 36.37,
"learning_rate": 3.4029638469757055e-06,
"loss": 0.2236,
"step": 3210
},
{
"epoch": 36.49,
"learning_rate": 3.3494736365637304e-06,
"loss": 0.2235,
"step": 3220
},
{
"epoch": 36.6,
"learning_rate": 3.2963224642995675e-06,
"loss": 0.222,
"step": 3230
},
{
"epoch": 36.71,
"learning_rate": 3.2435130397845472e-06,
"loss": 0.2237,
"step": 3240
},
{
"epoch": 36.83,
"learning_rate": 3.1910480551979706e-06,
"loss": 0.2239,
"step": 3250
},
{
"epoch": 36.94,
"learning_rate": 3.1389301851598976e-06,
"loss": 0.2149,
"step": 3260
},
{
"epoch": 37.05,
"learning_rate": 3.0871620865947816e-06,
"loss": 0.2279,
"step": 3270
},
{
"epoch": 37.17,
"learning_rate": 3.0357463985960257e-06,
"loss": 0.2007,
"step": 3280
},
{
"epoch": 37.28,
"learning_rate": 2.9846857422914434e-06,
"loss": 0.2132,
"step": 3290
},
{
"epoch": 37.39,
"learning_rate": 2.933982720709637e-06,
"loss": 0.2147,
"step": 3300
},
{
"epoch": 37.51,
"learning_rate": 2.883639918647296e-06,
"loss": 0.2121,
"step": 3310
},
{
"epoch": 37.62,
"learning_rate": 2.833659902537429e-06,
"loss": 0.2147,
"step": 3320
},
{
"epoch": 37.73,
"learning_rate": 2.7840452203185154e-06,
"loss": 0.2127,
"step": 3330
},
{
"epoch": 37.85,
"learning_rate": 2.7347984013046435e-06,
"loss": 0.2308,
"step": 3340
},
{
"epoch": 37.96,
"learning_rate": 2.6859219560565407e-06,
"loss": 0.2152,
"step": 3350
},
{
"epoch": 38.07,
"learning_rate": 2.637418376253591e-06,
"loss": 0.2161,
"step": 3360
},
{
"epoch": 38.19,
"learning_rate": 2.589290134566824e-06,
"loss": 0.2206,
"step": 3370
},
{
"epoch": 38.3,
"learning_rate": 2.541539684532852e-06,
"loss": 0.2256,
"step": 3380
},
{
"epoch": 38.41,
"learning_rate": 2.4941694604287913e-06,
"loss": 0.2096,
"step": 3390
},
{
"epoch": 38.53,
"learning_rate": 2.447181877148165e-06,
"loss": 0.2058,
"step": 3400
},
{
"epoch": 38.64,
"learning_rate": 2.4005793300777933e-06,
"loss": 0.2102,
"step": 3410
},
{
"epoch": 38.75,
"learning_rate": 2.3543641949756835e-06,
"loss": 0.2139,
"step": 3420
},
{
"epoch": 38.87,
"learning_rate": 2.30853882784991e-06,
"loss": 0.2048,
"step": 3430
},
{
"epoch": 38.98,
"learning_rate": 2.2631055648385e-06,
"loss": 0.2071,
"step": 3440
},
{
"epoch": 39.09,
"learning_rate": 2.2180667220903697e-06,
"loss": 0.206,
"step": 3450
},
{
"epoch": 39.21,
"learning_rate": 2.1734245956472024e-06,
"loss": 0.2049,
"step": 3460
},
{
"epoch": 39.32,
"learning_rate": 2.1291814613264383e-06,
"loss": 0.2096,
"step": 3470
},
{
"epoch": 39.43,
"learning_rate": 2.085339574605243e-06,
"loss": 0.1986,
"step": 3480
},
{
"epoch": 39.55,
"learning_rate": 2.0419011705055115e-06,
"loss": 0.2036,
"step": 3490
},
{
"epoch": 39.66,
"learning_rate": 1.998868463479945e-06,
"loss": 0.21,
"step": 3500
},
{
"epoch": 39.77,
"learning_rate": 1.956243647299155e-06,
"loss": 0.2155,
"step": 3510
},
{
"epoch": 39.89,
"learning_rate": 1.914028894939829e-06,
"loss": 0.2086,
"step": 3520
},
{
"epoch": 40.0,
"learning_rate": 1.8722263584739486e-06,
"loss": 0.1997,
"step": 3530
},
{
"epoch": 40.11,
"learning_rate": 1.830838168959075e-06,
"loss": 0.1932,
"step": 3540
},
{
"epoch": 40.23,
"learning_rate": 1.7898664363297302e-06,
"loss": 0.1997,
"step": 3550
},
{
"epoch": 40.34,
"learning_rate": 1.7493132492898134e-06,
"loss": 0.1975,
"step": 3560
},
{
"epoch": 40.45,
"learning_rate": 1.7091806752061212e-06,
"loss": 0.2072,
"step": 3570
},
{
"epoch": 40.57,
"learning_rate": 1.6694707600029702e-06,
"loss": 0.1786,
"step": 3580
},
{
"epoch": 40.68,
"learning_rate": 1.630185528057886e-06,
"loss": 0.2044,
"step": 3590
},
{
"epoch": 40.79,
"learning_rate": 1.5913269820984023e-06,
"loss": 0.2129,
"step": 3600
},
{
"epoch": 40.91,
"learning_rate": 1.5528971030999706e-06,
"loss": 0.2219,
"step": 3610
},
{
"epoch": 41.02,
"learning_rate": 1.5148978501849642e-06,
"loss": 0.2023,
"step": 3620
},
{
"epoch": 41.13,
"learning_rate": 1.4773311605228059e-06,
"loss": 0.1975,
"step": 3630
},
{
"epoch": 41.25,
"learning_rate": 1.4401989492312164e-06,
"loss": 0.1922,
"step": 3640
},
{
"epoch": 41.36,
"learning_rate": 1.403503109278579e-06,
"loss": 0.1868,
"step": 3650
},
{
"epoch": 41.47,
"learning_rate": 1.3672455113874429e-06,
"loss": 0.2009,
"step": 3660
},
{
"epoch": 41.59,
"learning_rate": 1.3314280039391426e-06,
"loss": 0.2064,
"step": 3670
},
{
"epoch": 41.7,
"learning_rate": 1.2960524128795837e-06,
"loss": 0.2036,
"step": 3680
},
{
"epoch": 41.81,
"learning_rate": 1.2611205416261595e-06,
"loss": 0.1896,
"step": 3690
},
{
"epoch": 41.93,
"learning_rate": 1.2266341709757946e-06,
"loss": 0.2078,
"step": 3700
},
{
"epoch": 42.04,
"learning_rate": 1.192595059014179e-06,
"loss": 0.1931,
"step": 3710
},
{
"epoch": 42.15,
"learning_rate": 1.1590049410261384e-06,
"loss": 0.203,
"step": 3720
},
{
"epoch": 42.27,
"learning_rate": 1.1258655294071686e-06,
"loss": 0.1928,
"step": 3730
},
{
"epoch": 42.38,
"learning_rate": 1.0931785135761375e-06,
"loss": 0.1898,
"step": 3740
},
{
"epoch": 42.49,
"learning_rate": 1.0609455598891682e-06,
"loss": 0.2001,
"step": 3750
},
{
"epoch": 42.61,
"learning_rate": 1.029168311554678e-06,
"loss": 0.2011,
"step": 3760
},
{
"epoch": 42.72,
"learning_rate": 9.978483885496204e-07,
"loss": 0.1871,
"step": 3770
},
{
"epoch": 42.83,
"learning_rate": 9.669873875368852e-07,
"loss": 0.2027,
"step": 3780
},
{
"epoch": 42.95,
"learning_rate": 9.365868817839264e-07,
"loss": 0.1941,
"step": 3790
},
{
"epoch": 43.06,
"learning_rate": 9.066484210825288e-07,
"loss": 0.1917,
"step": 3800
},
{
"epoch": 43.17,
"learning_rate": 8.771735316698249e-07,
"loss": 0.192,
"step": 3810
},
{
"epoch": 43.29,
"learning_rate": 8.481637161504741e-07,
"loss": 0.1959,
"step": 3820
},
{
"epoch": 43.4,
"learning_rate": 8.19620453420068e-07,
"loss": 0.192,
"step": 3830
},
{
"epoch": 43.51,
"learning_rate": 7.915451985897382e-07,
"loss": 0.2027,
"step": 3840
},
{
"epoch": 43.63,
"learning_rate": 7.639393829119701e-07,
"loss": 0.19,
"step": 3850
},
{
"epoch": 43.74,
"learning_rate": 7.368044137076435e-07,
"loss": 0.1919,
"step": 3860
},
{
"epoch": 43.85,
"learning_rate": 7.101416742942913e-07,
"loss": 0.1897,
"step": 3870
},
{
"epoch": 43.97,
"learning_rate": 6.839525239155675e-07,
"loss": 0.199,
"step": 3880
},
{
"epoch": 44.08,
"learning_rate": 6.582382976719703e-07,
"loss": 0.2,
"step": 3890
},
{
"epoch": 44.19,
"learning_rate": 6.330003064527679e-07,
"loss": 0.1917,
"step": 3900
},
{
"epoch": 44.31,
"learning_rate": 6.082398368691711e-07,
"loss": 0.1804,
"step": 3910
},
{
"epoch": 44.42,
"learning_rate": 5.839581511887515e-07,
"loss": 0.1971,
"step": 3920
},
{
"epoch": 44.53,
"learning_rate": 5.601564872710851e-07,
"loss": 0.1895,
"step": 3930
},
{
"epoch": 44.65,
"learning_rate": 5.36836058504645e-07,
"loss": 0.2008,
"step": 3940
},
{
"epoch": 44.76,
"learning_rate": 5.13998053744954e-07,
"loss": 0.1871,
"step": 3950
},
{
"epoch": 44.87,
"learning_rate": 4.916436372539668e-07,
"loss": 0.1899,
"step": 3960
},
{
"epoch": 44.99,
"learning_rate": 4.697739486407227e-07,
"loss": 0.1917,
"step": 3970
},
{
"epoch": 45.1,
"learning_rate": 4.4839010280325003e-07,
"loss": 0.1976,
"step": 3980
},
{
"epoch": 45.21,
"learning_rate": 4.2749318987172385e-07,
"loss": 0.1915,
"step": 3990
},
{
"epoch": 45.33,
"learning_rate": 4.070842751529025e-07,
"loss": 0.2004,
"step": 4000
},
{
"epoch": 45.44,
"learning_rate": 3.871643990758056e-07,
"loss": 0.1947,
"step": 4010
},
{
"epoch": 45.55,
"learning_rate": 3.6773457713868423e-07,
"loss": 0.1793,
"step": 4020
},
{
"epoch": 45.67,
"learning_rate": 3.487957998572511e-07,
"loss": 0.1882,
"step": 4030
},
{
"epoch": 45.78,
"learning_rate": 3.3034903271417564e-07,
"loss": 0.1862,
"step": 4040
},
{
"epoch": 45.89,
"learning_rate": 3.1239521610987757e-07,
"loss": 0.1862,
"step": 4050
},
{
"epoch": 46.01,
"learning_rate": 2.949352653145754e-07,
"loss": 0.1987,
"step": 4060
},
{
"epoch": 46.12,
"learning_rate": 2.779700704216337e-07,
"loss": 0.1837,
"step": 4070
},
{
"epoch": 46.23,
"learning_rate": 2.6150049630218277e-07,
"loss": 0.1949,
"step": 4080
},
{
"epoch": 46.35,
"learning_rate": 2.4552738256102717e-07,
"loss": 0.1892,
"step": 4090
},
{
"epoch": 46.46,
"learning_rate": 2.3005154349385106e-07,
"loss": 0.1959,
"step": 4100
},
{
"epoch": 46.57,
"learning_rate": 2.1507376804569935e-07,
"loss": 0.1925,
"step": 4110
},
{
"epoch": 46.69,
"learning_rate": 2.0059481977075523e-07,
"loss": 0.1933,
"step": 4120
},
{
"epoch": 46.8,
"learning_rate": 1.8661543679342365e-07,
"loss": 0.2049,
"step": 4130
},
{
"epoch": 46.91,
"learning_rate": 1.731363317706969e-07,
"loss": 0.1756,
"step": 4140
},
{
"epoch": 47.03,
"learning_rate": 1.601581918558237e-07,
"loss": 0.1746,
"step": 4150
},
{
"epoch": 47.14,
"learning_rate": 1.4768167866328176e-07,
"loss": 0.1963,
"step": 4160
},
{
"epoch": 47.25,
"learning_rate": 1.357074282350457e-07,
"loss": 0.1903,
"step": 4170
},
{
"epoch": 47.37,
"learning_rate": 1.2423605100816304e-07,
"loss": 0.1821,
"step": 4180
},
{
"epoch": 47.48,
"learning_rate": 1.1326813178363927e-07,
"loss": 0.1946,
"step": 4190
},
{
"epoch": 47.59,
"learning_rate": 1.0280422969661696e-07,
"loss": 0.1832,
"step": 4200
},
{
"epoch": 47.71,
"learning_rate": 9.284487818787879e-08,
"loss": 0.1892,
"step": 4210
},
{
"epoch": 47.82,
"learning_rate": 8.33905849766481e-08,
"loss": 0.1883,
"step": 4220
},
{
"epoch": 47.93,
"learning_rate": 7.444183203471067e-08,
"loss": 0.1921,
"step": 4230
},
{
"epoch": 48.05,
"learning_rate": 6.599907556184115e-08,
"loss": 0.2003,
"step": 4240
},
{
"epoch": 48.16,
"learning_rate": 5.806274596254491e-08,
"loss": 0.1789,
"step": 4250
},
{
"epoch": 48.27,
"learning_rate": 5.0633247824118936e-08,
"loss": 0.1853,
"step": 4260
},
{
"epoch": 48.39,
"learning_rate": 4.371095989602614e-08,
"loss": 0.1968,
"step": 4270
},
{
"epoch": 48.5,
"learning_rate": 3.729623507058744e-08,
"loss": 0.2019,
"step": 4280
},
{
"epoch": 48.61,
"learning_rate": 3.13894003649895e-08,
"loss": 0.1808,
"step": 4290
},
{
"epoch": 48.73,
"learning_rate": 2.5990756904614723e-08,
"loss": 0.1959,
"step": 4300
},
{
"epoch": 48.84,
"learning_rate": 2.1100579907691322e-08,
"loss": 0.1781,
"step": 4310
},
{
"epoch": 48.95,
"learning_rate": 1.6719118671262302e-08,
"loss": 0.1868,
"step": 4320
},
{
"epoch": 49.07,
"learning_rate": 1.2846596558473424e-08,
"loss": 0.1887,
"step": 4330
},
{
"epoch": 49.18,
"learning_rate": 9.483210987193404e-09,
"loss": 0.1982,
"step": 4340
},
{
"epoch": 49.29,
"learning_rate": 6.62913341994087e-09,
"loss": 0.1957,
"step": 4350
},
{
"epoch": 49.41,
"learning_rate": 4.284509355151345e-09,
"loss": 0.1925,
"step": 4360
},
{
"epoch": 49.52,
"learning_rate": 2.449458319754294e-09,
"loss": 0.1859,
"step": 4370
},
{
"epoch": 49.63,
"learning_rate": 1.124073863082442e-09,
"loss": 0.187,
"step": 4380
},
{
"epoch": 49.75,
"learning_rate": 3.0842355210336515e-10,
"loss": 0.1792,
"step": 4390
},
{
"epoch": 49.86,
"learning_rate": 2.548967970028571e-12,
"loss": 0.1895,
"step": 4400
}
],
"max_steps": 4400,
"num_train_epochs": 50,
"total_flos": 6.496431653421466e+17,
"trial_name": null,
"trial_params": null
}