|
{ |
|
"best_metric": 0.8947485685348511, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.11860637509266123, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005930318754633062, |
|
"grad_norm": 0.9128352403640747, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3096, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005930318754633062, |
|
"eval_loss": 1.4674732685089111, |
|
"eval_runtime": 54.584, |
|
"eval_samples_per_second": 52.03, |
|
"eval_steps_per_second": 13.007, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011860637509266123, |
|
"grad_norm": 0.917138397693634, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6473, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0017790956263899185, |
|
"grad_norm": 0.8538322448730469, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6559, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0023721275018532247, |
|
"grad_norm": 0.7117102742195129, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7047, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0029651593773165306, |
|
"grad_norm": 0.7061640024185181, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5857, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003558191252779837, |
|
"grad_norm": 0.7815567255020142, |
|
"learning_rate": 6e-05, |
|
"loss": 1.5563, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004151223128243143, |
|
"grad_norm": 0.8941373825073242, |
|
"learning_rate": 7e-05, |
|
"loss": 1.5596, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004744255003706449, |
|
"grad_norm": 0.7239966988563538, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4816, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005337286879169755, |
|
"grad_norm": 0.7085259556770325, |
|
"learning_rate": 9e-05, |
|
"loss": 1.5237, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005930318754633061, |
|
"grad_norm": 0.6864994168281555, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4989, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006523350630096367, |
|
"grad_norm": 0.6545014381408691, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 1.509, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.007116382505559674, |
|
"grad_norm": 0.700894296169281, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 1.3554, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00770941438102298, |
|
"grad_norm": 0.6438685059547424, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 1.3176, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008302446256486287, |
|
"grad_norm": 0.6856859922409058, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 1.3627, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008895478131949592, |
|
"grad_norm": 0.6096616983413696, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 1.3077, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009488510007412899, |
|
"grad_norm": 0.6056028008460999, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 1.3192, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.010081541882876204, |
|
"grad_norm": 0.674043595790863, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 1.3199, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01067457375833951, |
|
"grad_norm": 0.6460352540016174, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 1.275, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.011267605633802818, |
|
"grad_norm": 0.5855716466903687, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 1.1831, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.011860637509266123, |
|
"grad_norm": 0.6667662262916565, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 1.1451, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01245366938472943, |
|
"grad_norm": 0.6556470394134521, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 1.1135, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.013046701260192735, |
|
"grad_norm": 0.8012140393257141, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 1.2346, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.013639733135656041, |
|
"grad_norm": 0.6600927710533142, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 1.1783, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.014232765011119348, |
|
"grad_norm": 0.6758884191513062, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 1.0886, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.014825796886582653, |
|
"grad_norm": 0.6599623560905457, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 1.1174, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01541882876204596, |
|
"grad_norm": 0.6617851257324219, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 1.0537, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.016011860637509267, |
|
"grad_norm": 0.634557843208313, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 0.9785, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.016604892512972574, |
|
"grad_norm": 0.701481819152832, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 1.0195, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.017197924388435877, |
|
"grad_norm": 0.6694827079772949, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.0036, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.017790956263899184, |
|
"grad_norm": 0.6416652798652649, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 1.0097, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01838398813936249, |
|
"grad_norm": 0.7445120811462402, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 0.9926, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.018977020014825798, |
|
"grad_norm": 0.6441786885261536, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 0.9812, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.019570051890289104, |
|
"grad_norm": 0.691388726234436, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 1.0629, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.020163083765752408, |
|
"grad_norm": 0.7649052739143372, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 0.9518, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.020756115641215715, |
|
"grad_norm": 1.0184032917022705, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 0.8317, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02134914751667902, |
|
"grad_norm": 0.8641113042831421, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 0.8625, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.021942179392142328, |
|
"grad_norm": 0.950751006603241, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 0.9415, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.022535211267605635, |
|
"grad_norm": 0.8036394715309143, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 0.9917, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02312824314306894, |
|
"grad_norm": 0.7395977973937988, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 0.9532, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.023721275018532245, |
|
"grad_norm": 0.8059082627296448, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 0.8402, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.024314306893995552, |
|
"grad_norm": 0.684325098991394, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 0.8513, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02490733876945886, |
|
"grad_norm": 0.8658984899520874, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 0.9661, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.025500370644922166, |
|
"grad_norm": 1.7691848278045654, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 0.7337, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02609340252038547, |
|
"grad_norm": 0.8865183591842651, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 0.6731, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.026686434395848776, |
|
"grad_norm": 0.7921429872512817, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 0.8241, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.027279466271312083, |
|
"grad_norm": 0.7519988417625427, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 0.8407, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02787249814677539, |
|
"grad_norm": 1.0038756132125854, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 0.7287, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.028465530022238696, |
|
"grad_norm": 1.711969256401062, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.519, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.029058561897702003, |
|
"grad_norm": 1.0256000757217407, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 0.8058, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.029651593773165306, |
|
"grad_norm": 1.2814997434616089, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 0.9202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029651593773165306, |
|
"eval_loss": 1.0709905624389648, |
|
"eval_runtime": 54.7426, |
|
"eval_samples_per_second": 51.879, |
|
"eval_steps_per_second": 12.97, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.030244625648628613, |
|
"grad_norm": 1.2627829313278198, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 1.368, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03083765752409192, |
|
"grad_norm": 1.0638519525527954, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 1.5607, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03143068939955523, |
|
"grad_norm": 0.748832643032074, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 1.6571, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.032023721275018534, |
|
"grad_norm": 0.664341390132904, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 1.6136, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03261675315048184, |
|
"grad_norm": 0.553286075592041, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 1.4656, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03320978502594515, |
|
"grad_norm": 0.580268919467926, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 1.3559, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03380281690140845, |
|
"grad_norm": 0.6548421382904053, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 1.3577, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.034395848776871754, |
|
"grad_norm": 0.5364956855773926, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 1.2899, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03498888065233506, |
|
"grad_norm": 0.5661771893501282, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 1.3188, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03558191252779837, |
|
"grad_norm": 0.5021414160728455, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 1.2907, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.036174944403261675, |
|
"grad_norm": 0.5147365927696228, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 1.2875, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03676797627872498, |
|
"grad_norm": 0.5012564659118652, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 1.3046, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03736100815418829, |
|
"grad_norm": 0.5347183346748352, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 1.317, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.037954040029651595, |
|
"grad_norm": 0.4898674488067627, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 1.2163, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0385470719051149, |
|
"grad_norm": 0.4707097113132477, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 1.2045, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03914010378057821, |
|
"grad_norm": 0.4987703859806061, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 1.187, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.039733135656041516, |
|
"grad_norm": 0.48926910758018494, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.1191, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.040326167531504815, |
|
"grad_norm": 0.4914247393608093, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 1.0986, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04091919940696812, |
|
"grad_norm": 0.461151659488678, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 1.0333, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04151223128243143, |
|
"grad_norm": 0.503083348274231, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 1.0957, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.042105263157894736, |
|
"grad_norm": 0.5450732111930847, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 0.928, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04269829503335804, |
|
"grad_norm": 0.47438186407089233, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 0.9875, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04329132690882135, |
|
"grad_norm": 0.5245692729949951, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 1.0826, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.043884358784284656, |
|
"grad_norm": 0.6992484927177429, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 0.9841, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04447739065974796, |
|
"grad_norm": 0.5221351981163025, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 0.8991, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04507042253521127, |
|
"grad_norm": 0.4883466362953186, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 0.8717, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04566345441067458, |
|
"grad_norm": 0.5207772850990295, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 0.9564, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.04625648628613788, |
|
"grad_norm": 0.5393484234809875, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 0.8084, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.046849518161601184, |
|
"grad_norm": 0.5158237814903259, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 0.8711, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04744255003706449, |
|
"grad_norm": 0.5125463604927063, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 0.8791, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0480355819125278, |
|
"grad_norm": 0.5904987454414368, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 0.7699, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.048628613787991104, |
|
"grad_norm": 0.5575411915779114, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 0.8107, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04922164566345441, |
|
"grad_norm": 0.5736557245254517, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 0.9037, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04981467753891772, |
|
"grad_norm": 0.6238594651222229, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 0.8845, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.050407709414381024, |
|
"grad_norm": 0.6680272221565247, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 0.7974, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05100074128984433, |
|
"grad_norm": 1.050567626953125, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.6855, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05159377316530764, |
|
"grad_norm": 0.6209374666213989, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 0.961, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.05218680504077094, |
|
"grad_norm": 0.6250402927398682, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 0.8688, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.052779836916234245, |
|
"grad_norm": 0.5850189924240112, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 0.8454, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05337286879169755, |
|
"grad_norm": 0.5783194899559021, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 0.7482, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05396590066716086, |
|
"grad_norm": 0.6094998121261597, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 0.8129, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.054558932542624165, |
|
"grad_norm": 0.9608752727508545, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 0.6524, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05515196441808747, |
|
"grad_norm": 0.7261383533477783, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 0.7844, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05574499629355078, |
|
"grad_norm": 0.6840940713882446, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 0.7562, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.056338028169014086, |
|
"grad_norm": 0.6493006348609924, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 0.8178, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05693106004447739, |
|
"grad_norm": 0.665821373462677, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 0.7347, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0575240919199407, |
|
"grad_norm": 1.3736786842346191, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 0.2814, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.058117123795404006, |
|
"grad_norm": 1.1150555610656738, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 0.2965, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.058710155670867306, |
|
"grad_norm": 0.791911780834198, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 0.6964, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05930318754633061, |
|
"grad_norm": 0.8627227544784546, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 0.6942, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05930318754633061, |
|
"eval_loss": 0.9763190746307373, |
|
"eval_runtime": 54.8056, |
|
"eval_samples_per_second": 51.819, |
|
"eval_steps_per_second": 12.955, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05989621942179392, |
|
"grad_norm": 0.8971495032310486, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 1.2981, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.06048925129725723, |
|
"grad_norm": 0.788299024105072, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 1.4664, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.06108228317272053, |
|
"grad_norm": 0.6883640289306641, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 1.6317, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.06167531504818384, |
|
"grad_norm": 0.5831127166748047, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 1.6468, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.06226834692364715, |
|
"grad_norm": 0.5455831289291382, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5163, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06286137879911045, |
|
"grad_norm": 0.46923744678497314, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 1.3974, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.06345441067457376, |
|
"grad_norm": 0.5191442370414734, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 1.3646, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.06404744255003707, |
|
"grad_norm": 0.5451369881629944, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 1.3397, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06464047442550037, |
|
"grad_norm": 0.4729631841182709, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 1.2549, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06523350630096368, |
|
"grad_norm": 0.58608078956604, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 1.2826, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06582653817642699, |
|
"grad_norm": 0.5048196315765381, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 1.3105, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0664195700518903, |
|
"grad_norm": 0.47246071696281433, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 1.1458, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0670126019273536, |
|
"grad_norm": 0.4854651391506195, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 1.0838, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0676056338028169, |
|
"grad_norm": 0.4831504225730896, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 1.0581, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0681986656782802, |
|
"grad_norm": 0.4957878887653351, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 1.1557, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06879169755374351, |
|
"grad_norm": 0.5005252957344055, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 1.1404, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06938472942920682, |
|
"grad_norm": 0.47843456268310547, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 1.0712, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06997776130467012, |
|
"grad_norm": 0.49580538272857666, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 1.0859, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.07057079318013343, |
|
"grad_norm": 0.4846254289150238, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 1.0659, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.07116382505559674, |
|
"grad_norm": 0.5240991115570068, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 1.0308, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07175685693106004, |
|
"grad_norm": 0.4797056317329407, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 1.0719, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.07234988880652335, |
|
"grad_norm": 0.5224462151527405, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 0.957, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.07294292068198666, |
|
"grad_norm": 0.5027078986167908, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 0.9855, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07353595255744996, |
|
"grad_norm": 0.5758591890335083, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.891, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07412898443291327, |
|
"grad_norm": 0.6827547550201416, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 0.8659, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07472201630837658, |
|
"grad_norm": 0.5156536102294922, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 0.8322, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07531504818383988, |
|
"grad_norm": 0.5152995586395264, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 0.7989, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.07590808005930319, |
|
"grad_norm": 0.5190814733505249, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 0.8023, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0765011119347665, |
|
"grad_norm": 0.5331418514251709, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 0.7931, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0770941438102298, |
|
"grad_norm": 0.625654399394989, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 0.8473, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07768717568569311, |
|
"grad_norm": 0.5979682803153992, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 0.7122, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07828020756115642, |
|
"grad_norm": 0.5621092915534973, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 0.7681, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07887323943661972, |
|
"grad_norm": 0.6109548211097717, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 0.7551, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.07946627131208303, |
|
"grad_norm": 0.5191078782081604, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 0.7578, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.08005930318754632, |
|
"grad_norm": 0.5392255783081055, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 0.8482, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.08065233506300963, |
|
"grad_norm": 0.7676748037338257, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 0.5408, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.08124536693847294, |
|
"grad_norm": 0.6903252601623535, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 0.5913, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.08183839881393624, |
|
"grad_norm": 0.5773659348487854, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 0.817, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.08243143068939955, |
|
"grad_norm": 0.5975673794746399, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 0.8512, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.08302446256486286, |
|
"grad_norm": 0.551443874835968, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 0.7687, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08361749444032616, |
|
"grad_norm": 0.8684813976287842, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 0.686, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08421052631578947, |
|
"grad_norm": 0.5953558087348938, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 0.7231, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08480355819125278, |
|
"grad_norm": 1.1803919076919556, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.3837, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08539659006671609, |
|
"grad_norm": 0.5375893115997314, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 0.5268, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08598962194217939, |
|
"grad_norm": 0.6543303728103638, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 0.6599, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0865826538176427, |
|
"grad_norm": 0.5823112726211548, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 0.6481, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.087175685693106, |
|
"grad_norm": 0.7933789491653442, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 0.7557, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08776871756856931, |
|
"grad_norm": 2.7837235927581787, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 0.2413, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08836174944403262, |
|
"grad_norm": 0.9221758842468262, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 0.5467, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08895478131949593, |
|
"grad_norm": 0.7956274151802063, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 0.6351, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08895478131949593, |
|
"eval_loss": 0.904806911945343, |
|
"eval_runtime": 54.6116, |
|
"eval_samples_per_second": 52.004, |
|
"eval_steps_per_second": 13.001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08954781319495923, |
|
"grad_norm": 0.49180299043655396, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 1.2796, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.09014084507042254, |
|
"grad_norm": 0.5233712196350098, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 1.4463, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.09073387694588585, |
|
"grad_norm": 0.5484112501144409, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 1.5206, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.09132690882134915, |
|
"grad_norm": 0.6222096085548401, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 1.5653, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.09191994069681246, |
|
"grad_norm": 0.5047340393066406, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 1.2886, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.09251297257227575, |
|
"grad_norm": 0.5084788799285889, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 1.3614, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.09310600444773906, |
|
"grad_norm": 0.49157023429870605, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 1.2741, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.09369903632320237, |
|
"grad_norm": 0.45855769515037537, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 1.2554, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09429206819866567, |
|
"grad_norm": 0.5020678043365479, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 1.169, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09488510007412898, |
|
"grad_norm": 0.46542999148368835, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 1.2578, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09547813194959229, |
|
"grad_norm": 0.5047861337661743, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 1.1876, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0960711638250556, |
|
"grad_norm": 0.5085510015487671, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.053, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0966641957005189, |
|
"grad_norm": 0.5247685313224792, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 1.1907, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09725722757598221, |
|
"grad_norm": 0.4579811096191406, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 1.1396, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09785025945144551, |
|
"grad_norm": 0.46387508511543274, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 1.1433, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09844329132690882, |
|
"grad_norm": 0.46310198307037354, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 1.0559, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09903632320237213, |
|
"grad_norm": 0.5656064748764038, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 0.984, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09962935507783544, |
|
"grad_norm": 0.4853728711605072, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 1.1338, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.10022238695329874, |
|
"grad_norm": 0.5021612644195557, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 1.0033, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.10081541882876205, |
|
"grad_norm": 0.479026198387146, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 0.9687, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10140845070422536, |
|
"grad_norm": 0.5409302711486816, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 0.9316, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.10200148257968866, |
|
"grad_norm": 0.48674267530441284, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 0.9845, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.10259451445515197, |
|
"grad_norm": 0.5077137351036072, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 0.88, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.10318754633061528, |
|
"grad_norm": 0.503619372844696, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 0.8173, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.10378057820607858, |
|
"grad_norm": 0.5211278200149536, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 0.8957, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10437361008154188, |
|
"grad_norm": 0.5747191309928894, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 0.8104, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.10496664195700518, |
|
"grad_norm": 0.5396358370780945, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 0.8349, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.10555967383246849, |
|
"grad_norm": 0.5236896872520447, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 0.6931, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1061527057079318, |
|
"grad_norm": 0.5552003979682922, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 0.7589, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1067457375833951, |
|
"grad_norm": 0.5147039294242859, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 0.848, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10733876945885841, |
|
"grad_norm": 0.5422276258468628, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.8282, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.10793180133432172, |
|
"grad_norm": 0.47938427329063416, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 0.7093, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.10852483320978502, |
|
"grad_norm": 0.5359724164009094, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 0.861, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.10911786508524833, |
|
"grad_norm": 0.5440419912338257, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 0.6547, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.10971089696071164, |
|
"grad_norm": 0.6765339970588684, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 0.5238, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.11030392883617494, |
|
"grad_norm": 0.6517006158828735, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 0.5315, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.11089696071163825, |
|
"grad_norm": 0.5708439350128174, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 0.7547, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.11148999258710156, |
|
"grad_norm": 0.6777458786964417, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 0.6551, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.11208302446256486, |
|
"grad_norm": 0.5782615542411804, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 0.7399, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.11267605633802817, |
|
"grad_norm": 0.5603607296943665, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 0.8177, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11326908821349148, |
|
"grad_norm": 0.5594826340675354, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 0.7413, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.11386212008895479, |
|
"grad_norm": 0.7118792533874512, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 0.5511, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.11445515196441809, |
|
"grad_norm": 1.1664906740188599, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 0.3968, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1150481838398814, |
|
"grad_norm": 0.5703531503677368, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 0.743, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1156412157153447, |
|
"grad_norm": 0.6180101633071899, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 0.7577, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11623424759080801, |
|
"grad_norm": 0.6108247637748718, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 0.7962, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1168272794662713, |
|
"grad_norm": 0.5928218960762024, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 0.4661, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11742031134173461, |
|
"grad_norm": 0.7264745235443115, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 0.2437, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11801334321719792, |
|
"grad_norm": 0.6450815200805664, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 0.6115, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11860637509266123, |
|
"grad_norm": 0.8737947940826416, |
|
"learning_rate": 0.0, |
|
"loss": 0.6112, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11860637509266123, |
|
"eval_loss": 0.8947485685348511, |
|
"eval_runtime": 54.972, |
|
"eval_samples_per_second": 51.663, |
|
"eval_steps_per_second": 12.916, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6944697731710976e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|