gliner-biomed-base-rel-1stg-v1.0 / trainer_state.json
Ihor's picture
Upload folder using huggingface_hub
1697dd0 verified
raw
history blame
28.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.802654408616688,
"eval_steps": 500,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011266590053854301,
"grad_norm": 3268.47607421875,
"learning_rate": 3.125e-06,
"loss": 80243.872,
"step": 500
},
{
"epoch": 0.022533180107708602,
"grad_norm": 1101.09423828125,
"learning_rate": 6.25e-06,
"loss": 455.9426,
"step": 1000
},
{
"epoch": 0.0337997701615629,
"grad_norm": 1345.1741943359375,
"learning_rate": 9.375000000000001e-06,
"loss": 398.2565,
"step": 1500
},
{
"epoch": 0.045066360215417205,
"grad_norm": 957.8522338867188,
"learning_rate": 1.25e-05,
"loss": 361.662,
"step": 2000
},
{
"epoch": 0.0563329502692715,
"grad_norm": 1153.3565673828125,
"learning_rate": 1.5625e-05,
"loss": 351.6057,
"step": 2500
},
{
"epoch": 0.0675995403231258,
"grad_norm": 3220.947021484375,
"learning_rate": 1.8750000000000002e-05,
"loss": 324.4043,
"step": 3000
},
{
"epoch": 0.0788661303769801,
"grad_norm": 1318.7249755859375,
"learning_rate": 2.1875e-05,
"loss": 303.6339,
"step": 3500
},
{
"epoch": 0.09013272043083441,
"grad_norm": 1085.81982421875,
"learning_rate": 2.5e-05,
"loss": 290.8328,
"step": 4000
},
{
"epoch": 0.1013993104846887,
"grad_norm": 1732.97412109375,
"learning_rate": 2.8125000000000003e-05,
"loss": 286.8244,
"step": 4500
},
{
"epoch": 0.112665900538543,
"grad_norm": 1018.1327514648438,
"learning_rate": 3.125e-05,
"loss": 274.4126,
"step": 5000
},
{
"epoch": 0.1239324905923973,
"grad_norm": 1115.255859375,
"learning_rate": 3.4375e-05,
"loss": 258.1825,
"step": 5500
},
{
"epoch": 0.1351990806462516,
"grad_norm": 2264.675537109375,
"learning_rate": 3.7500000000000003e-05,
"loss": 249.4162,
"step": 6000
},
{
"epoch": 0.1464656707001059,
"grad_norm": 1202.668212890625,
"learning_rate": 4.0625000000000005e-05,
"loss": 238.756,
"step": 6500
},
{
"epoch": 0.1577322607539602,
"grad_norm": 1271.8997802734375,
"learning_rate": 4.375e-05,
"loss": 234.6415,
"step": 7000
},
{
"epoch": 0.16899885080781452,
"grad_norm": 880.48291015625,
"learning_rate": 4.6875e-05,
"loss": 220.2277,
"step": 7500
},
{
"epoch": 0.18026544086166882,
"grad_norm": 1199.511962890625,
"learning_rate": 5e-05,
"loss": 212.284,
"step": 8000
},
{
"epoch": 0.19153203091552312,
"grad_norm": 885.7015991210938,
"learning_rate": 4.999405067699773e-05,
"loss": 197.9255,
"step": 8500
},
{
"epoch": 0.2027986209693774,
"grad_norm": 1130.3941650390625,
"learning_rate": 4.997620553954645e-05,
"loss": 201.8366,
"step": 9000
},
{
"epoch": 0.2140652110232317,
"grad_norm": 1032.0145263671875,
"learning_rate": 4.994647308096509e-05,
"loss": 195.4686,
"step": 9500
},
{
"epoch": 0.225331801077086,
"grad_norm": 1835.482666015625,
"learning_rate": 4.990486745229364e-05,
"loss": 186.7062,
"step": 10000
},
{
"epoch": 0.2365983911309403,
"grad_norm": 1477.0096435546875,
"learning_rate": 4.985140845555799e-05,
"loss": 188.2111,
"step": 10500
},
{
"epoch": 0.2478649811847946,
"grad_norm": 688.68310546875,
"learning_rate": 4.9786121534345265e-05,
"loss": 180.3121,
"step": 11000
},
{
"epoch": 0.2591315712386489,
"grad_norm": 1981.5882568359375,
"learning_rate": 4.970903776169402e-05,
"loss": 171.624,
"step": 11500
},
{
"epoch": 0.2703981612925032,
"grad_norm": 1986.2034912109375,
"learning_rate": 4.962019382530521e-05,
"loss": 178.433,
"step": 12000
},
{
"epoch": 0.2816647513463575,
"grad_norm": 1576.7353515625,
"learning_rate": 4.951963201008076e-05,
"loss": 164.5357,
"step": 12500
},
{
"epoch": 0.2929313414002118,
"grad_norm": 1520.091796875,
"learning_rate": 4.940740017799833e-05,
"loss": 162.9244,
"step": 13000
},
{
"epoch": 0.3041979314540661,
"grad_norm": 1361.819091796875,
"learning_rate": 4.9283551745331534e-05,
"loss": 164.2202,
"step": 13500
},
{
"epoch": 0.3154645215079204,
"grad_norm": 1085.12548828125,
"learning_rate": 4.914814565722671e-05,
"loss": 164.809,
"step": 14000
},
{
"epoch": 0.3267311115617747,
"grad_norm": 1452.0218505859375,
"learning_rate": 4.9001246359648224e-05,
"loss": 162.6041,
"step": 14500
},
{
"epoch": 0.33799770161562903,
"grad_norm": 1728.5352783203125,
"learning_rate": 4.884292376870567e-05,
"loss": 156.4712,
"step": 15000
},
{
"epoch": 0.3492642916694833,
"grad_norm": 1888.999267578125,
"learning_rate": 4.867325323737765e-05,
"loss": 151.6581,
"step": 15500
},
{
"epoch": 0.36053088172333764,
"grad_norm": 1127.0830078125,
"learning_rate": 4.849231551964771e-05,
"loss": 154.7625,
"step": 16000
},
{
"epoch": 0.3717974717771919,
"grad_norm": 1579.301513671875,
"learning_rate": 4.830019673206997e-05,
"loss": 158.3963,
"step": 16500
},
{
"epoch": 0.38306406183104624,
"grad_norm": 1466.5936279296875,
"learning_rate": 4.8096988312782174e-05,
"loss": 151.9632,
"step": 17000
},
{
"epoch": 0.3943306518849005,
"grad_norm": 1501.413330078125,
"learning_rate": 4.788278697798618e-05,
"loss": 152.0479,
"step": 17500
},
{
"epoch": 0.4055972419387548,
"grad_norm": 938.9967651367188,
"learning_rate": 4.765769467591625e-05,
"loss": 147.7795,
"step": 18000
},
{
"epoch": 0.4168638319926091,
"grad_norm": 1407.6708984375,
"learning_rate": 4.742181853831721e-05,
"loss": 145.714,
"step": 18500
},
{
"epoch": 0.4281304220464634,
"grad_norm": 1046.4781494140625,
"learning_rate": 4.717527082945554e-05,
"loss": 147.87,
"step": 19000
},
{
"epoch": 0.43939701210031773,
"grad_norm": 1437.9764404296875,
"learning_rate": 4.69181688926877e-05,
"loss": 139.7023,
"step": 19500
},
{
"epoch": 0.450663602154172,
"grad_norm": 1155.10595703125,
"learning_rate": 4.665063509461097e-05,
"loss": 146.1232,
"step": 20000
},
{
"epoch": 0.46193019220802634,
"grad_norm": 1317.321044921875,
"learning_rate": 4.637279676682367e-05,
"loss": 139.9448,
"step": 20500
},
{
"epoch": 0.4731967822618806,
"grad_norm": 1005.6251831054688,
"learning_rate": 4.608478614532215e-05,
"loss": 142.7613,
"step": 21000
},
{
"epoch": 0.48446337231573494,
"grad_norm": 2557.992919921875,
"learning_rate": 4.5786740307563636e-05,
"loss": 139.5429,
"step": 21500
},
{
"epoch": 0.4957299623695892,
"grad_norm": 1912.8707275390625,
"learning_rate": 4.54788011072248e-05,
"loss": 138.4188,
"step": 22000
},
{
"epoch": 0.5069965524234435,
"grad_norm": 1349.5655517578125,
"learning_rate": 4.516111510668707e-05,
"loss": 135.984,
"step": 22500
},
{
"epoch": 0.5182631424772978,
"grad_norm": 2255.250732421875,
"learning_rate": 4.4833833507280884e-05,
"loss": 136.2522,
"step": 23000
},
{
"epoch": 0.5295297325311521,
"grad_norm": 947.2132568359375,
"learning_rate": 4.4497112077322044e-05,
"loss": 135.873,
"step": 23500
},
{
"epoch": 0.5407963225850064,
"grad_norm": 1092.2021484375,
"learning_rate": 4.415111107797445e-05,
"loss": 136.0968,
"step": 24000
},
{
"epoch": 0.5520629126388608,
"grad_norm": 2551.856201171875,
"learning_rate": 4.379599518697444e-05,
"loss": 133.4127,
"step": 24500
},
{
"epoch": 0.563329502692715,
"grad_norm": 1087.750732421875,
"learning_rate": 4.34319334202531e-05,
"loss": 137.0111,
"step": 25000
},
{
"epoch": 0.5745960927465693,
"grad_norm": 710.9840698242188,
"learning_rate": 4.305909905149389e-05,
"loss": 128.7961,
"step": 25500
},
{
"epoch": 0.5858626828004236,
"grad_norm": 1028.0732421875,
"learning_rate": 4.267766952966369e-05,
"loss": 131.5538,
"step": 26000
},
{
"epoch": 0.597129272854278,
"grad_norm": 1650.9874267578125,
"learning_rate": 4.228782639455674e-05,
"loss": 131.9968,
"step": 26500
},
{
"epoch": 0.6083958629081322,
"grad_norm": 3326.53564453125,
"learning_rate": 4.188975519039151e-05,
"loss": 129.6315,
"step": 27000
},
{
"epoch": 0.6196624529619865,
"grad_norm": 2218.9794921875,
"learning_rate": 4.148364537750172e-05,
"loss": 126.7289,
"step": 27500
},
{
"epoch": 0.6309290430158409,
"grad_norm": 1092.4063720703125,
"learning_rate": 4.1069690242163484e-05,
"loss": 131.7434,
"step": 28000
},
{
"epoch": 0.6421956330696951,
"grad_norm": 1642.626220703125,
"learning_rate": 4.064808680460148e-05,
"loss": 131.2289,
"step": 28500
},
{
"epoch": 0.6534622231235494,
"grad_norm": 2752.57470703125,
"learning_rate": 4.021903572521802e-05,
"loss": 125.4669,
"step": 29000
},
{
"epoch": 0.6647288131774037,
"grad_norm": 2134.545654296875,
"learning_rate": 3.978274120908956e-05,
"loss": 128.8268,
"step": 29500
},
{
"epoch": 0.6759954032312581,
"grad_norm": 1844.7005615234375,
"learning_rate": 3.933941090877615e-05,
"loss": 126.0543,
"step": 30000
},
{
"epoch": 0.6872619932851123,
"grad_norm": 911.3765869140625,
"learning_rate": 3.888925582549006e-05,
"loss": 124.5692,
"step": 30500
},
{
"epoch": 0.6985285833389666,
"grad_norm": 1240.784423828125,
"learning_rate": 3.84324902086706e-05,
"loss": 129.9127,
"step": 31000
},
{
"epoch": 0.7097951733928209,
"grad_norm": 1387.1654052734375,
"learning_rate": 3.796933145401304e-05,
"loss": 128.229,
"step": 31500
},
{
"epoch": 0.7210617634466753,
"grad_norm": 5207.958984375,
"learning_rate": 3.7500000000000003e-05,
"loss": 123.6102,
"step": 32000
},
{
"epoch": 0.7323283535005295,
"grad_norm": 1478.238525390625,
"learning_rate": 3.702471922298469e-05,
"loss": 122.5027,
"step": 32500
},
{
"epoch": 0.7435949435543838,
"grad_norm": 1219.868408203125,
"learning_rate": 3.654371533087586e-05,
"loss": 121.6549,
"step": 33000
},
{
"epoch": 0.7548615336082382,
"grad_norm": 1724.080078125,
"learning_rate": 3.6057217255475034e-05,
"loss": 122.2066,
"step": 33500
},
{
"epoch": 0.7661281236620925,
"grad_norm": 2315.779052734375,
"learning_rate": 3.556545654351749e-05,
"loss": 124.5871,
"step": 34000
},
{
"epoch": 0.7773947137159467,
"grad_norm": 1415.927978515625,
"learning_rate": 3.5068667246468436e-05,
"loss": 119.864,
"step": 34500
},
{
"epoch": 0.788661303769801,
"grad_norm": 2292.79736328125,
"learning_rate": 3.456708580912725e-05,
"loss": 124.4397,
"step": 35000
},
{
"epoch": 0.7999278938236554,
"grad_norm": 2198.923583984375,
"learning_rate": 3.406095095709254e-05,
"loss": 119.8706,
"step": 35500
},
{
"epoch": 0.8111944838775096,
"grad_norm": 774.8341064453125,
"learning_rate": 3.355050358314172e-05,
"loss": 122.3335,
"step": 36000
},
{
"epoch": 0.8224610739313639,
"grad_norm": 1356.7291259765625,
"learning_rate": 3.303598663257904e-05,
"loss": 117.6119,
"step": 36500
},
{
"epoch": 0.8337276639852182,
"grad_norm": 0.0,
"learning_rate": 3.251764498760683e-05,
"loss": 122.5679,
"step": 37000
},
{
"epoch": 0.8449942540390726,
"grad_norm": 0.0,
"learning_rate": 3.1995725350774806e-05,
"loss": 116.4365,
"step": 37500
},
{
"epoch": 0.8562608440929268,
"grad_norm": 1365.280517578125,
"learning_rate": 3.147047612756302e-05,
"loss": 119.871,
"step": 38000
},
{
"epoch": 0.8675274341467811,
"grad_norm": 1635.288330078125,
"learning_rate": 3.094214730815433e-05,
"loss": 116.4286,
"step": 38500
},
{
"epoch": 0.8787940242006355,
"grad_norm": 1430.511962890625,
"learning_rate": 3.0410990348452573e-05,
"loss": 113.9206,
"step": 39000
},
{
"epoch": 0.8900606142544898,
"grad_norm": 1990.450927734375,
"learning_rate": 2.9877258050403212e-05,
"loss": 118.977,
"step": 39500
},
{
"epoch": 0.901327204308344,
"grad_norm": 875.0726928710938,
"learning_rate": 2.9341204441673266e-05,
"loss": 114.4106,
"step": 40000
},
{
"epoch": 0.9125937943621983,
"grad_norm": 1655.7935791015625,
"learning_rate": 2.8803084654747918e-05,
"loss": 115.3111,
"step": 40500
},
{
"epoch": 0.9238603844160527,
"grad_norm": 1956.72216796875,
"learning_rate": 2.8263154805501297e-05,
"loss": 117.3135,
"step": 41000
},
{
"epoch": 0.9351269744699069,
"grad_norm": 937.9488525390625,
"learning_rate": 2.7721671871299116e-05,
"loss": 116.4852,
"step": 41500
},
{
"epoch": 0.9463935645237612,
"grad_norm": 1579.9736328125,
"learning_rate": 2.717889356869146e-05,
"loss": 118.1533,
"step": 42000
},
{
"epoch": 0.9576601545776156,
"grad_norm": 1284.749755859375,
"learning_rate": 2.663507823075358e-05,
"loss": 113.1541,
"step": 42500
},
{
"epoch": 0.9689267446314699,
"grad_norm": 1050.632080078125,
"learning_rate": 2.6090484684133404e-05,
"loss": 115.3209,
"step": 43000
},
{
"epoch": 0.9801933346853241,
"grad_norm": 1058.0616455078125,
"learning_rate": 2.5545372125864032e-05,
"loss": 119.323,
"step": 43500
},
{
"epoch": 0.9914599247391784,
"grad_norm": 1310.6236572265625,
"learning_rate": 2.5e-05,
"loss": 111.4601,
"step": 44000
},
{
"epoch": 1.0,
"eval_loss": 114.70393371582031,
"eval_runtime": 1385.5714,
"eval_samples_per_second": 14.238,
"eval_steps_per_second": 3.56,
"step": 44379
},
{
"epoch": 1.0027265147930327,
"grad_norm": 1699.930419921875,
"learning_rate": 2.4454627874135974e-05,
"loss": 111.6484,
"step": 44500
},
{
"epoch": 1.013993104846887,
"grad_norm": 0.0,
"learning_rate": 2.3909515315866605e-05,
"loss": 111.7967,
"step": 45000
},
{
"epoch": 1.0252596949007413,
"grad_norm": 1440.404052734375,
"learning_rate": 2.3364921769246423e-05,
"loss": 107.7449,
"step": 45500
},
{
"epoch": 1.0365262849545958,
"grad_norm": 1180.1439208984375,
"learning_rate": 2.2821106431308544e-05,
"loss": 109.0394,
"step": 46000
},
{
"epoch": 1.04779287500845,
"grad_norm": 1600.07568359375,
"learning_rate": 2.2278328128700893e-05,
"loss": 104.1056,
"step": 46500
},
{
"epoch": 1.0590594650623042,
"grad_norm": 1731.480224609375,
"learning_rate": 2.173684519449872e-05,
"loss": 106.6714,
"step": 47000
},
{
"epoch": 1.0703260551161586,
"grad_norm": 955.4271240234375,
"learning_rate": 2.1196915345252084e-05,
"loss": 105.6627,
"step": 47500
},
{
"epoch": 1.0815926451700129,
"grad_norm": 2818.5283203125,
"learning_rate": 2.0658795558326743e-05,
"loss": 106.0866,
"step": 48000
},
{
"epoch": 1.092859235223867,
"grad_norm": 806.58447265625,
"learning_rate": 2.0122741949596797e-05,
"loss": 111.0945,
"step": 48500
},
{
"epoch": 1.1041258252777215,
"grad_norm": 1397.9573974609375,
"learning_rate": 1.958900965154743e-05,
"loss": 104.8343,
"step": 49000
},
{
"epoch": 1.1153924153315757,
"grad_norm": 827.4343872070312,
"learning_rate": 1.9057852691845677e-05,
"loss": 103.2378,
"step": 49500
},
{
"epoch": 1.12665900538543,
"grad_norm": 2430.602294921875,
"learning_rate": 1.852952387243698e-05,
"loss": 107.4349,
"step": 50000
},
{
"epoch": 1.1379255954392844,
"grad_norm": 1653.0751953125,
"learning_rate": 1.80042746492252e-05,
"loss": 111.0688,
"step": 50500
},
{
"epoch": 1.1491921854931386,
"grad_norm": 1318.4361572265625,
"learning_rate": 1.7482355012393177e-05,
"loss": 107.7423,
"step": 51000
},
{
"epoch": 1.160458775546993,
"grad_norm": 1956.568603515625,
"learning_rate": 1.6964013367420966e-05,
"loss": 103.8008,
"step": 51500
},
{
"epoch": 1.1717253656008473,
"grad_norm": 764.591552734375,
"learning_rate": 1.6449496416858284e-05,
"loss": 104.8202,
"step": 52000
},
{
"epoch": 1.1829919556547015,
"grad_norm": 1741.2701416015625,
"learning_rate": 1.5939049042907462e-05,
"loss": 107.5796,
"step": 52500
},
{
"epoch": 1.194258545708556,
"grad_norm": 2550.640625,
"learning_rate": 1.5432914190872757e-05,
"loss": 101.9078,
"step": 53000
},
{
"epoch": 1.2055251357624102,
"grad_norm": 698.9768676757812,
"learning_rate": 1.4931332753531574e-05,
"loss": 108.1872,
"step": 53500
},
{
"epoch": 1.2167917258162644,
"grad_norm": 3405.810546875,
"learning_rate": 1.443454345648252e-05,
"loss": 99.6448,
"step": 54000
},
{
"epoch": 1.2280583158701188,
"grad_norm": 3267.17236328125,
"learning_rate": 1.3942782744524973e-05,
"loss": 108.0154,
"step": 54500
},
{
"epoch": 1.239324905923973,
"grad_norm": 1127.849853515625,
"learning_rate": 1.3456284669124158e-05,
"loss": 100.0244,
"step": 55000
},
{
"epoch": 1.2505914959778273,
"grad_norm": 1082.1241455078125,
"learning_rate": 1.2975280777015314e-05,
"loss": 104.4581,
"step": 55500
},
{
"epoch": 1.2618580860316817,
"grad_norm": 1860.104248046875,
"learning_rate": 1.2500000000000006e-05,
"loss": 103.1496,
"step": 56000
},
{
"epoch": 1.273124676085536,
"grad_norm": 1682.26708984375,
"learning_rate": 1.2030668545986959e-05,
"loss": 107.3074,
"step": 56500
},
{
"epoch": 1.2843912661393904,
"grad_norm": 616.0757446289062,
"learning_rate": 1.1567509791329401e-05,
"loss": 102.4632,
"step": 57000
},
{
"epoch": 1.2956578561932446,
"grad_norm": 624.0526733398438,
"learning_rate": 1.1110744174509952e-05,
"loss": 100.0244,
"step": 57500
},
{
"epoch": 1.3069244462470988,
"grad_norm": 1387.1529541015625,
"learning_rate": 1.0660589091223855e-05,
"loss": 105.9446,
"step": 58000
},
{
"epoch": 1.318191036300953,
"grad_norm": 1828.0877685546875,
"learning_rate": 1.0217258790910448e-05,
"loss": 104.6986,
"step": 58500
},
{
"epoch": 1.3294576263548075,
"grad_norm": 650.5953369140625,
"learning_rate": 9.780964274781984e-06,
"loss": 105.4167,
"step": 59000
},
{
"epoch": 1.3407242164086617,
"grad_norm": 996.328125,
"learning_rate": 9.351913195398524e-06,
"loss": 103.2462,
"step": 59500
},
{
"epoch": 1.3519908064625161,
"grad_norm": 1035.3780517578125,
"learning_rate": 8.930309757836517e-06,
"loss": 103.7156,
"step": 60000
},
{
"epoch": 1.3632573965163703,
"grad_norm": 1865.9571533203125,
"learning_rate": 8.51635462249828e-06,
"loss": 105.3619,
"step": 60500
},
{
"epoch": 1.3745239865702246,
"grad_norm": 2139.88671875,
"learning_rate": 8.110244809608495e-06,
"loss": 103.2506,
"step": 61000
},
{
"epoch": 1.385790576624079,
"grad_norm": 2942.61083984375,
"learning_rate": 7.712173605443269e-06,
"loss": 102.9886,
"step": 61500
},
{
"epoch": 1.3970571666779332,
"grad_norm": 415.38153076171875,
"learning_rate": 7.3223304703363135e-06,
"loss": 102.6045,
"step": 62000
},
{
"epoch": 1.4083237567317877,
"grad_norm": 1247.396728515625,
"learning_rate": 6.940900948506113e-06,
"loss": 102.5344,
"step": 62500
},
{
"epoch": 1.4195903467856419,
"grad_norm": 740.3623046875,
"learning_rate": 6.568066579746901e-06,
"loss": 104.4504,
"step": 63000
},
{
"epoch": 1.430856936839496,
"grad_norm": 1221.581298828125,
"learning_rate": 6.204004813025568e-06,
"loss": 101.7631,
"step": 63500
},
{
"epoch": 1.4421235268933503,
"grad_norm": 1514.0115966796875,
"learning_rate": 5.848888922025553e-06,
"loss": 99.2285,
"step": 64000
},
{
"epoch": 1.4533901169472048,
"grad_norm": 821.0801391601562,
"learning_rate": 5.50288792267796e-06,
"loss": 102.3846,
"step": 64500
},
{
"epoch": 1.464656707001059,
"grad_norm": 1313.3104248046875,
"learning_rate": 5.166166492719124e-06,
"loss": 103.6035,
"step": 65000
},
{
"epoch": 1.4759232970549134,
"grad_norm": 1329.42919921875,
"learning_rate": 4.8388848933129335e-06,
"loss": 104.2133,
"step": 65500
},
{
"epoch": 1.4871898871087676,
"grad_norm": 3106.43505859375,
"learning_rate": 4.521198892775203e-06,
"loss": 100.1638,
"step": 66000
},
{
"epoch": 1.4984564771626219,
"grad_norm": 996.626220703125,
"learning_rate": 4.213259692436367e-06,
"loss": 97.5407,
"step": 66500
},
{
"epoch": 1.5097230672164763,
"grad_norm": 1821.3323974609375,
"learning_rate": 3.9152138546778625e-06,
"loss": 102.4095,
"step": 67000
},
{
"epoch": 1.5209896572703305,
"grad_norm": 1764.0323486328125,
"learning_rate": 3.6272032331763408e-06,
"loss": 96.4079,
"step": 67500
},
{
"epoch": 1.532256247324185,
"grad_norm": 1377.978271484375,
"learning_rate": 3.3493649053890326e-06,
"loss": 104.5846,
"step": 68000
},
{
"epoch": 1.5435228373780392,
"grad_norm": 1291.5908203125,
"learning_rate": 3.081831107312308e-06,
"loss": 100.9904,
"step": 68500
},
{
"epoch": 1.5547894274318934,
"grad_norm": 2216.6796875,
"learning_rate": 2.8247291705444575e-06,
"loss": 99.7059,
"step": 69000
},
{
"epoch": 1.5660560174857476,
"grad_norm": 1271.5609130859375,
"learning_rate": 2.578181461682794e-06,
"loss": 99.0976,
"step": 69500
},
{
"epoch": 1.577322607539602,
"grad_norm": 2712.908203125,
"learning_rate": 2.3423053240837515e-06,
"loss": 99.1392,
"step": 70000
},
{
"epoch": 1.5885891975934565,
"grad_norm": 1653.12890625,
"learning_rate": 2.1172130220138226e-06,
"loss": 101.0942,
"step": 70500
},
{
"epoch": 1.5998557876473107,
"grad_norm": 1816.012451171875,
"learning_rate": 1.9030116872178316e-06,
"loss": 99.6448,
"step": 71000
},
{
"epoch": 1.611122377701165,
"grad_norm": 1233.8719482421875,
"learning_rate": 1.6998032679300391e-06,
"loss": 101.785,
"step": 71500
},
{
"epoch": 1.6223889677550192,
"grad_norm": 988.7132568359375,
"learning_rate": 1.5076844803522922e-06,
"loss": 95.7829,
"step": 72000
},
{
"epoch": 1.6336555578088736,
"grad_norm": 1261.22021484375,
"learning_rate": 1.3267467626223606e-06,
"loss": 105.201,
"step": 72500
},
{
"epoch": 1.6449221478627278,
"grad_norm": 934.3240966796875,
"learning_rate": 1.1570762312943295e-06,
"loss": 98.9473,
"step": 73000
},
{
"epoch": 1.6561887379165823,
"grad_norm": 664.5089111328125,
"learning_rate": 9.98753640351785e-07,
"loss": 95.5199,
"step": 73500
},
{
"epoch": 1.6674553279704365,
"grad_norm": 1963.7371826171875,
"learning_rate": 8.51854342773295e-07,
"loss": 101.1854,
"step": 74000
},
{
"epoch": 1.6787219180242907,
"grad_norm": 2443.130126953125,
"learning_rate": 7.164482546684642e-07,
"loss": 97.2825,
"step": 74500
},
{
"epoch": 1.689988508078145,
"grad_norm": 1137.326416015625,
"learning_rate": 5.925998220016659e-07,
"loss": 100.3116,
"step": 75000
},
{
"epoch": 1.7012550981319994,
"grad_norm": 1520.0399169921875,
"learning_rate": 4.803679899192392e-07,
"loss": 100.4226,
"step": 75500
},
{
"epoch": 1.7125216881858538,
"grad_norm": 1065.4630126953125,
"learning_rate": 3.7980617469479953e-07,
"loss": 101.6046,
"step": 76000
},
{
"epoch": 1.723788278239708,
"grad_norm": 1042.942138671875,
"learning_rate": 2.909622383059835e-07,
"loss": 99.1135,
"step": 76500
},
{
"epoch": 1.7350548682935623,
"grad_norm": 841.5830078125,
"learning_rate": 2.1387846565474045e-07,
"loss": 101.7611,
"step": 77000
},
{
"epoch": 1.7463214583474165,
"grad_norm": 3369.8330078125,
"learning_rate": 1.4859154444200884e-07,
"loss": 99.4417,
"step": 77500
},
{
"epoch": 1.757588048401271,
"grad_norm": 2432.385498046875,
"learning_rate": 9.513254770636137e-08,
"loss": 100.2592,
"step": 78000
},
{
"epoch": 1.7688546384551251,
"grad_norm": 1680.1993408203125,
"learning_rate": 5.352691903491303e-08,
"loss": 98.8878,
"step": 78500
},
{
"epoch": 1.7801212285089796,
"grad_norm": 682.8377075195312,
"learning_rate": 2.3794460453555047e-08,
"loss": 97.455,
"step": 79000
},
{
"epoch": 1.7913878185628338,
"grad_norm": 0.0,
"learning_rate": 5.94932300227169e-09,
"loss": 102.4157,
"step": 79500
},
{
"epoch": 1.802654408616688,
"grad_norm": 3429.1298828125,
"learning_rate": 0.0,
"loss": 97.4711,
"step": 80000
}
],
"logging_steps": 500,
"max_steps": 80000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 20000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}