|
{ |
|
"best_metric": 1.8002580404281616, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.005402047375955487, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.7010236879777435e-05, |
|
"grad_norm": 2.8267626762390137, |
|
"learning_rate": 1.0018000000000001e-05, |
|
"loss": 2.4272, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.7010236879777435e-05, |
|
"eval_loss": 2.406503200531006, |
|
"eval_runtime": 323.2701, |
|
"eval_samples_per_second": 48.223, |
|
"eval_steps_per_second": 12.058, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 5.402047375955487e-05, |
|
"grad_norm": 4.365167140960693, |
|
"learning_rate": 2.0036000000000003e-05, |
|
"loss": 2.3437, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 8.103071063933231e-05, |
|
"grad_norm": 4.1403656005859375, |
|
"learning_rate": 3.0054e-05, |
|
"loss": 1.9492, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00010804094751910974, |
|
"grad_norm": 4.30712890625, |
|
"learning_rate": 4.0072000000000005e-05, |
|
"loss": 2.1199, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00013505118439888718, |
|
"grad_norm": 3.849486827850342, |
|
"learning_rate": 5.009e-05, |
|
"loss": 2.1014, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00016206142127866461, |
|
"grad_norm": 4.485018253326416, |
|
"learning_rate": 6.0108e-05, |
|
"loss": 2.5438, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00018907165815844205, |
|
"grad_norm": 4.500787734985352, |
|
"learning_rate": 7.0126e-05, |
|
"loss": 3.0282, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00021608189503821948, |
|
"grad_norm": 4.6447014808654785, |
|
"learning_rate": 8.014400000000001e-05, |
|
"loss": 2.5221, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0002430921319179969, |
|
"grad_norm": 4.513657569885254, |
|
"learning_rate": 9.016200000000001e-05, |
|
"loss": 2.6626, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00027010236879777437, |
|
"grad_norm": 4.137198448181152, |
|
"learning_rate": 0.00010018, |
|
"loss": 2.2971, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00029711260567755177, |
|
"grad_norm": 4.193837642669678, |
|
"learning_rate": 9.965273684210526e-05, |
|
"loss": 2.3521, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00032412284255732923, |
|
"grad_norm": 4.4173583984375, |
|
"learning_rate": 9.912547368421053e-05, |
|
"loss": 2.3232, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0003511330794371067, |
|
"grad_norm": 3.6425013542175293, |
|
"learning_rate": 9.859821052631579e-05, |
|
"loss": 2.1981, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0003781433163168841, |
|
"grad_norm": 4.589376449584961, |
|
"learning_rate": 9.807094736842106e-05, |
|
"loss": 2.7546, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00040515355319666155, |
|
"grad_norm": 3.747391939163208, |
|
"learning_rate": 9.754368421052633e-05, |
|
"loss": 2.0203, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00043216379007643895, |
|
"grad_norm": 4.8549346923828125, |
|
"learning_rate": 9.701642105263158e-05, |
|
"loss": 2.7234, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0004591740269562164, |
|
"grad_norm": 6.589162826538086, |
|
"learning_rate": 9.648915789473685e-05, |
|
"loss": 3.0366, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0004861842638359938, |
|
"grad_norm": 4.530044078826904, |
|
"learning_rate": 9.596189473684211e-05, |
|
"loss": 2.2864, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0005131945007157713, |
|
"grad_norm": 5.117183208465576, |
|
"learning_rate": 9.543463157894737e-05, |
|
"loss": 2.5922, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0005402047375955487, |
|
"grad_norm": 4.832082748413086, |
|
"learning_rate": 9.490736842105264e-05, |
|
"loss": 2.4913, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0005672149744753262, |
|
"grad_norm": 3.8494045734405518, |
|
"learning_rate": 9.43801052631579e-05, |
|
"loss": 1.9371, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0005942252113551035, |
|
"grad_norm": 3.686887264251709, |
|
"learning_rate": 9.385284210526316e-05, |
|
"loss": 2.0102, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.000621235448234881, |
|
"grad_norm": 4.124686241149902, |
|
"learning_rate": 9.332557894736843e-05, |
|
"loss": 2.0297, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0006482456851146585, |
|
"grad_norm": 3.525970220565796, |
|
"learning_rate": 9.279831578947369e-05, |
|
"loss": 1.8261, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0006752559219944359, |
|
"grad_norm": 2.8308498859405518, |
|
"learning_rate": 9.227105263157896e-05, |
|
"loss": 1.3451, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0007022661588742134, |
|
"grad_norm": 4.149746894836426, |
|
"learning_rate": 9.174378947368421e-05, |
|
"loss": 2.3011, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0007292763957539907, |
|
"grad_norm": 4.0569634437561035, |
|
"learning_rate": 9.121652631578948e-05, |
|
"loss": 2.2933, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0007562866326337682, |
|
"grad_norm": 3.2820467948913574, |
|
"learning_rate": 9.068926315789475e-05, |
|
"loss": 1.6895, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0007832968695135456, |
|
"grad_norm": 4.019529819488525, |
|
"learning_rate": 9.016200000000001e-05, |
|
"loss": 1.8502, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0008103071063933231, |
|
"grad_norm": 3.2498834133148193, |
|
"learning_rate": 8.963473684210526e-05, |
|
"loss": 1.585, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0008373173432731006, |
|
"grad_norm": 4.720952987670898, |
|
"learning_rate": 8.910747368421053e-05, |
|
"loss": 2.1338, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0008643275801528779, |
|
"grad_norm": 3.4808425903320312, |
|
"learning_rate": 8.858021052631579e-05, |
|
"loss": 1.5556, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0008913378170326554, |
|
"grad_norm": 3.387439012527466, |
|
"learning_rate": 8.805294736842106e-05, |
|
"loss": 1.7884, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0009183480539124328, |
|
"grad_norm": 3.980246067047119, |
|
"learning_rate": 8.752568421052633e-05, |
|
"loss": 2.0877, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0009453582907922103, |
|
"grad_norm": 3.564471483230591, |
|
"learning_rate": 8.699842105263159e-05, |
|
"loss": 1.8872, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0009723685276719876, |
|
"grad_norm": 3.5869364738464355, |
|
"learning_rate": 8.647115789473686e-05, |
|
"loss": 1.6645, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0009993787645517652, |
|
"grad_norm": 4.11824893951416, |
|
"learning_rate": 8.594389473684211e-05, |
|
"loss": 1.9814, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0010263890014315425, |
|
"grad_norm": 3.116626024246216, |
|
"learning_rate": 8.541663157894737e-05, |
|
"loss": 1.4822, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.00105339923831132, |
|
"grad_norm": 3.8870787620544434, |
|
"learning_rate": 8.488936842105264e-05, |
|
"loss": 2.2419, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0010804094751910975, |
|
"grad_norm": 3.9773778915405273, |
|
"learning_rate": 8.43621052631579e-05, |
|
"loss": 1.9448, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0011074197120708748, |
|
"grad_norm": 4.536470413208008, |
|
"learning_rate": 8.383484210526316e-05, |
|
"loss": 2.0608, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0011344299489506524, |
|
"grad_norm": 3.5629429817199707, |
|
"learning_rate": 8.330757894736843e-05, |
|
"loss": 1.7975, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0011614401858304297, |
|
"grad_norm": 3.6513662338256836, |
|
"learning_rate": 8.278031578947369e-05, |
|
"loss": 1.9062, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.001188450422710207, |
|
"grad_norm": 3.534175157546997, |
|
"learning_rate": 8.225305263157896e-05, |
|
"loss": 1.8835, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0012154606595899846, |
|
"grad_norm": 3.87431001663208, |
|
"learning_rate": 8.172578947368422e-05, |
|
"loss": 1.7802, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.001242470896469762, |
|
"grad_norm": 3.6539125442504883, |
|
"learning_rate": 8.119852631578947e-05, |
|
"loss": 1.9138, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0012694811333495396, |
|
"grad_norm": 3.291635036468506, |
|
"learning_rate": 8.067126315789474e-05, |
|
"loss": 1.6632, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.001296491370229317, |
|
"grad_norm": 4.104748249053955, |
|
"learning_rate": 8.014400000000001e-05, |
|
"loss": 1.8235, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0013235016071090943, |
|
"grad_norm": 3.2002694606781006, |
|
"learning_rate": 7.961673684210527e-05, |
|
"loss": 1.3527, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0013505118439888718, |
|
"grad_norm": 3.7805356979370117, |
|
"learning_rate": 7.908947368421054e-05, |
|
"loss": 1.8133, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013505118439888718, |
|
"eval_loss": 1.8875995874404907, |
|
"eval_runtime": 323.1313, |
|
"eval_samples_per_second": 48.244, |
|
"eval_steps_per_second": 12.063, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013775220808686492, |
|
"grad_norm": 3.0262796878814697, |
|
"learning_rate": 7.856221052631579e-05, |
|
"loss": 2.1624, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0014045323177484267, |
|
"grad_norm": 2.7900643348693848, |
|
"learning_rate": 7.803494736842106e-05, |
|
"loss": 1.9421, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.001431542554628204, |
|
"grad_norm": 2.8437888622283936, |
|
"learning_rate": 7.750768421052632e-05, |
|
"loss": 2.1949, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0014585527915079814, |
|
"grad_norm": 2.974017858505249, |
|
"learning_rate": 7.698042105263157e-05, |
|
"loss": 2.146, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.001485563028387759, |
|
"grad_norm": 2.7476348876953125, |
|
"learning_rate": 7.645315789473686e-05, |
|
"loss": 2.0717, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0015125732652675364, |
|
"grad_norm": 2.9807491302490234, |
|
"learning_rate": 7.592589473684211e-05, |
|
"loss": 1.9227, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.001539583502147314, |
|
"grad_norm": 3.2775368690490723, |
|
"learning_rate": 7.539863157894737e-05, |
|
"loss": 2.129, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0015665937390270913, |
|
"grad_norm": 3.4354827404022217, |
|
"learning_rate": 7.487136842105264e-05, |
|
"loss": 2.3402, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0015936039759068686, |
|
"grad_norm": 3.4880383014678955, |
|
"learning_rate": 7.43441052631579e-05, |
|
"loss": 2.5008, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0016206142127866462, |
|
"grad_norm": 3.056316375732422, |
|
"learning_rate": 7.381684210526315e-05, |
|
"loss": 1.8598, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0016476244496664235, |
|
"grad_norm": 4.000846862792969, |
|
"learning_rate": 7.328957894736844e-05, |
|
"loss": 2.1936, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0016746346865462011, |
|
"grad_norm": 3.282672643661499, |
|
"learning_rate": 7.276231578947369e-05, |
|
"loss": 2.2763, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0017016449234259785, |
|
"grad_norm": 3.3345065116882324, |
|
"learning_rate": 7.223505263157895e-05, |
|
"loss": 2.0944, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0017286551603057558, |
|
"grad_norm": 3.6332178115844727, |
|
"learning_rate": 7.170778947368422e-05, |
|
"loss": 1.998, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0017556653971855334, |
|
"grad_norm": 3.5125057697296143, |
|
"learning_rate": 7.118052631578947e-05, |
|
"loss": 2.0625, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0017826756340653107, |
|
"grad_norm": 3.124108076095581, |
|
"learning_rate": 7.065326315789474e-05, |
|
"loss": 1.7761, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.001809685870945088, |
|
"grad_norm": 4.27671480178833, |
|
"learning_rate": 7.0126e-05, |
|
"loss": 2.4772, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0018366961078248656, |
|
"grad_norm": 3.6868836879730225, |
|
"learning_rate": 6.959873684210527e-05, |
|
"loss": 2.1861, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.001863706344704643, |
|
"grad_norm": 3.100480556488037, |
|
"learning_rate": 6.907147368421054e-05, |
|
"loss": 1.8642, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0018907165815844206, |
|
"grad_norm": 3.209296226501465, |
|
"learning_rate": 6.85442105263158e-05, |
|
"loss": 1.7952, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.001917726818464198, |
|
"grad_norm": 2.694065570831299, |
|
"learning_rate": 6.801694736842105e-05, |
|
"loss": 1.6004, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0019447370553439753, |
|
"grad_norm": 3.056002616882324, |
|
"learning_rate": 6.748968421052632e-05, |
|
"loss": 1.9669, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.001971747292223753, |
|
"grad_norm": 3.0243594646453857, |
|
"learning_rate": 6.696242105263158e-05, |
|
"loss": 1.6867, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0019987575291035304, |
|
"grad_norm": 2.582789897918701, |
|
"learning_rate": 6.643515789473685e-05, |
|
"loss": 1.485, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0020257677659833075, |
|
"grad_norm": 3.274164915084839, |
|
"learning_rate": 6.590789473684212e-05, |
|
"loss": 1.8415, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.002052778002863085, |
|
"grad_norm": 2.930992603302002, |
|
"learning_rate": 6.538063157894737e-05, |
|
"loss": 1.5259, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0020797882397428627, |
|
"grad_norm": 2.674891710281372, |
|
"learning_rate": 6.485336842105264e-05, |
|
"loss": 1.5149, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.00210679847662264, |
|
"grad_norm": 3.3651278018951416, |
|
"learning_rate": 6.43261052631579e-05, |
|
"loss": 1.6893, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0021338087135024174, |
|
"grad_norm": 3.8227052688598633, |
|
"learning_rate": 6.379884210526315e-05, |
|
"loss": 1.8186, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.002160818950382195, |
|
"grad_norm": 2.7289936542510986, |
|
"learning_rate": 6.327157894736842e-05, |
|
"loss": 1.464, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0021878291872619725, |
|
"grad_norm": 4.077561378479004, |
|
"learning_rate": 6.274431578947368e-05, |
|
"loss": 1.6729, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0022148394241417496, |
|
"grad_norm": 3.349513053894043, |
|
"learning_rate": 6.221705263157895e-05, |
|
"loss": 1.8011, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.002241849661021527, |
|
"grad_norm": 3.1440014839172363, |
|
"learning_rate": 6.168978947368422e-05, |
|
"loss": 1.6098, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0022688598979013048, |
|
"grad_norm": 3.1661434173583984, |
|
"learning_rate": 6.116252631578948e-05, |
|
"loss": 1.805, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.002295870134781082, |
|
"grad_norm": 3.5450026988983154, |
|
"learning_rate": 6.063526315789474e-05, |
|
"loss": 1.7791, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0023228803716608595, |
|
"grad_norm": 3.2813570499420166, |
|
"learning_rate": 6.0108e-05, |
|
"loss": 1.8894, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.002349890608540637, |
|
"grad_norm": 2.9585013389587402, |
|
"learning_rate": 5.9580736842105264e-05, |
|
"loss": 1.547, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.002376900845420414, |
|
"grad_norm": 3.132396697998047, |
|
"learning_rate": 5.905347368421053e-05, |
|
"loss": 1.5077, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0024039110823001917, |
|
"grad_norm": 3.426370859146118, |
|
"learning_rate": 5.85262105263158e-05, |
|
"loss": 1.6307, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0024309213191799693, |
|
"grad_norm": 3.346588611602783, |
|
"learning_rate": 5.799894736842106e-05, |
|
"loss": 1.8479, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0024579315560597464, |
|
"grad_norm": 3.3509597778320312, |
|
"learning_rate": 5.747168421052632e-05, |
|
"loss": 1.7515, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.002484941792939524, |
|
"grad_norm": 3.85239839553833, |
|
"learning_rate": 5.694442105263158e-05, |
|
"loss": 2.0091, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0025119520298193016, |
|
"grad_norm": 3.2585794925689697, |
|
"learning_rate": 5.641715789473684e-05, |
|
"loss": 1.3418, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.002538962266699079, |
|
"grad_norm": 3.9172606468200684, |
|
"learning_rate": 5.5889894736842104e-05, |
|
"loss": 1.708, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0025659725035788563, |
|
"grad_norm": 3.717564821243286, |
|
"learning_rate": 5.5362631578947374e-05, |
|
"loss": 2.1761, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.002592982740458634, |
|
"grad_norm": 3.911463499069214, |
|
"learning_rate": 5.483536842105264e-05, |
|
"loss": 1.8531, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0026199929773384114, |
|
"grad_norm": 3.121046304702759, |
|
"learning_rate": 5.43081052631579e-05, |
|
"loss": 1.2697, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0026470032142181885, |
|
"grad_norm": 3.0859768390655518, |
|
"learning_rate": 5.378084210526316e-05, |
|
"loss": 1.4625, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.002674013451097966, |
|
"grad_norm": 5.068177223205566, |
|
"learning_rate": 5.3253578947368426e-05, |
|
"loss": 1.7805, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0027010236879777437, |
|
"grad_norm": 4.796220302581787, |
|
"learning_rate": 5.272631578947368e-05, |
|
"loss": 2.0215, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0027010236879777437, |
|
"eval_loss": 1.84506356716156, |
|
"eval_runtime": 324.3562, |
|
"eval_samples_per_second": 48.061, |
|
"eval_steps_per_second": 12.018, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002728033924857521, |
|
"grad_norm": 2.6804707050323486, |
|
"learning_rate": 5.2199052631578945e-05, |
|
"loss": 1.7513, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0027550441617372984, |
|
"grad_norm": 2.743324041366577, |
|
"learning_rate": 5.167178947368422e-05, |
|
"loss": 1.5835, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.002782054398617076, |
|
"grad_norm": 2.639810800552368, |
|
"learning_rate": 5.114452631578948e-05, |
|
"loss": 1.7481, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0028090646354968535, |
|
"grad_norm": 2.4762966632843018, |
|
"learning_rate": 5.061726315789474e-05, |
|
"loss": 1.7123, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0028360748723766306, |
|
"grad_norm": 3.050942897796631, |
|
"learning_rate": 5.009e-05, |
|
"loss": 2.2438, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.002863085109256408, |
|
"grad_norm": 2.996593952178955, |
|
"learning_rate": 4.9562736842105266e-05, |
|
"loss": 2.2312, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0028900953461361858, |
|
"grad_norm": 3.17206072807312, |
|
"learning_rate": 4.903547368421053e-05, |
|
"loss": 2.3683, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.002917105583015963, |
|
"grad_norm": 3.2420525550842285, |
|
"learning_rate": 4.850821052631579e-05, |
|
"loss": 2.261, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0029441158198957405, |
|
"grad_norm": 3.376255989074707, |
|
"learning_rate": 4.7980947368421055e-05, |
|
"loss": 2.0114, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.002971126056775518, |
|
"grad_norm": 3.353220224380493, |
|
"learning_rate": 4.745368421052632e-05, |
|
"loss": 2.208, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.002998136293655295, |
|
"grad_norm": 3.3891849517822266, |
|
"learning_rate": 4.692642105263158e-05, |
|
"loss": 2.4548, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0030251465305350727, |
|
"grad_norm": 3.2557313442230225, |
|
"learning_rate": 4.6399157894736844e-05, |
|
"loss": 2.1374, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0030521567674148503, |
|
"grad_norm": 3.2548961639404297, |
|
"learning_rate": 4.5871894736842107e-05, |
|
"loss": 2.4684, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.003079167004294628, |
|
"grad_norm": 3.8495306968688965, |
|
"learning_rate": 4.5344631578947376e-05, |
|
"loss": 2.2422, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.003106177241174405, |
|
"grad_norm": 4.851099014282227, |
|
"learning_rate": 4.481736842105263e-05, |
|
"loss": 2.7144, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0031331874780541826, |
|
"grad_norm": 3.6053466796875, |
|
"learning_rate": 4.4290105263157895e-05, |
|
"loss": 2.0815, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.00316019771493396, |
|
"grad_norm": 3.7665255069732666, |
|
"learning_rate": 4.3762842105263165e-05, |
|
"loss": 2.2639, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0031872079518137373, |
|
"grad_norm": 2.965463638305664, |
|
"learning_rate": 4.323557894736843e-05, |
|
"loss": 1.6309, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.003214218188693515, |
|
"grad_norm": 2.783212661743164, |
|
"learning_rate": 4.2708315789473684e-05, |
|
"loss": 1.783, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0032412284255732924, |
|
"grad_norm": 2.667881965637207, |
|
"learning_rate": 4.218105263157895e-05, |
|
"loss": 1.5607, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0032682386624530695, |
|
"grad_norm": 2.851439952850342, |
|
"learning_rate": 4.1653789473684217e-05, |
|
"loss": 1.8242, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.003295248899332847, |
|
"grad_norm": 2.810152530670166, |
|
"learning_rate": 4.112652631578948e-05, |
|
"loss": 1.8485, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0033222591362126247, |
|
"grad_norm": 2.722660779953003, |
|
"learning_rate": 4.0599263157894736e-05, |
|
"loss": 1.7689, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0033492693730924022, |
|
"grad_norm": 2.6506547927856445, |
|
"learning_rate": 4.0072000000000005e-05, |
|
"loss": 1.5785, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0033762796099721794, |
|
"grad_norm": 2.728020668029785, |
|
"learning_rate": 3.954473684210527e-05, |
|
"loss": 1.8581, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.003403289846851957, |
|
"grad_norm": 2.982895851135254, |
|
"learning_rate": 3.901747368421053e-05, |
|
"loss": 2.2773, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0034303000837317345, |
|
"grad_norm": 2.847287178039551, |
|
"learning_rate": 3.849021052631579e-05, |
|
"loss": 1.5742, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0034573103206115116, |
|
"grad_norm": 2.692082643508911, |
|
"learning_rate": 3.796294736842106e-05, |
|
"loss": 1.6871, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.003484320557491289, |
|
"grad_norm": 3.2670037746429443, |
|
"learning_rate": 3.743568421052632e-05, |
|
"loss": 1.8359, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0035113307943710668, |
|
"grad_norm": 2.5639259815216064, |
|
"learning_rate": 3.6908421052631576e-05, |
|
"loss": 1.4626, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.003538341031250844, |
|
"grad_norm": 3.087303400039673, |
|
"learning_rate": 3.6381157894736846e-05, |
|
"loss": 1.8146, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0035653512681306215, |
|
"grad_norm": 3.2488865852355957, |
|
"learning_rate": 3.585389473684211e-05, |
|
"loss": 1.8134, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.003592361505010399, |
|
"grad_norm": 3.2120273113250732, |
|
"learning_rate": 3.532663157894737e-05, |
|
"loss": 1.484, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.003619371741890176, |
|
"grad_norm": 2.597536087036133, |
|
"learning_rate": 3.4799368421052634e-05, |
|
"loss": 1.4691, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0036463819787699537, |
|
"grad_norm": 3.0587146282196045, |
|
"learning_rate": 3.42721052631579e-05, |
|
"loss": 1.4249, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0036733922156497313, |
|
"grad_norm": 3.46545672416687, |
|
"learning_rate": 3.374484210526316e-05, |
|
"loss": 1.5226, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.003700402452529509, |
|
"grad_norm": 2.8155453205108643, |
|
"learning_rate": 3.321757894736842e-05, |
|
"loss": 1.7263, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.003727412689409286, |
|
"grad_norm": 3.027614116668701, |
|
"learning_rate": 3.2690315789473686e-05, |
|
"loss": 1.8355, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0037544229262890636, |
|
"grad_norm": 3.7238800525665283, |
|
"learning_rate": 3.216305263157895e-05, |
|
"loss": 1.4877, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.003781433163168841, |
|
"grad_norm": 3.3556723594665527, |
|
"learning_rate": 3.163578947368421e-05, |
|
"loss": 1.8014, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0038084434000486183, |
|
"grad_norm": 2.854469060897827, |
|
"learning_rate": 3.1108526315789475e-05, |
|
"loss": 1.5076, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.003835453636928396, |
|
"grad_norm": 2.8921852111816406, |
|
"learning_rate": 3.058126315789474e-05, |
|
"loss": 1.7795, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.0038624638738081734, |
|
"grad_norm": 3.3138952255249023, |
|
"learning_rate": 3.0054e-05, |
|
"loss": 1.6566, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0038894741106879505, |
|
"grad_norm": 3.637906551361084, |
|
"learning_rate": 2.9526736842105263e-05, |
|
"loss": 1.8738, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0039164843475677285, |
|
"grad_norm": 3.2864222526550293, |
|
"learning_rate": 2.899947368421053e-05, |
|
"loss": 1.649, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.003943494584447506, |
|
"grad_norm": 2.840507984161377, |
|
"learning_rate": 2.847221052631579e-05, |
|
"loss": 1.4216, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.003970504821327283, |
|
"grad_norm": 2.9531781673431396, |
|
"learning_rate": 2.7944947368421052e-05, |
|
"loss": 1.5841, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.003997515058207061, |
|
"grad_norm": 3.016000509262085, |
|
"learning_rate": 2.741768421052632e-05, |
|
"loss": 1.3651, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.004024525295086838, |
|
"grad_norm": 3.315701961517334, |
|
"learning_rate": 2.689042105263158e-05, |
|
"loss": 1.4405, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.004051535531966615, |
|
"grad_norm": 3.887491226196289, |
|
"learning_rate": 2.636315789473684e-05, |
|
"loss": 1.8023, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004051535531966615, |
|
"eval_loss": 1.8144692182540894, |
|
"eval_runtime": 321.8975, |
|
"eval_samples_per_second": 48.428, |
|
"eval_steps_per_second": 12.109, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004078545768846393, |
|
"grad_norm": 2.3892409801483154, |
|
"learning_rate": 2.583589473684211e-05, |
|
"loss": 1.8418, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.00410555600572617, |
|
"grad_norm": 2.7030460834503174, |
|
"learning_rate": 2.530863157894737e-05, |
|
"loss": 1.9883, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.004132566242605947, |
|
"grad_norm": 2.809255838394165, |
|
"learning_rate": 2.4781368421052633e-05, |
|
"loss": 1.994, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.004159576479485725, |
|
"grad_norm": 2.7749948501586914, |
|
"learning_rate": 2.4254105263157896e-05, |
|
"loss": 1.7347, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0041865867163655025, |
|
"grad_norm": 2.7523014545440674, |
|
"learning_rate": 2.372684210526316e-05, |
|
"loss": 1.4725, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.00421359695324528, |
|
"grad_norm": 3.1755049228668213, |
|
"learning_rate": 2.3199578947368422e-05, |
|
"loss": 1.9171, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.004240607190125058, |
|
"grad_norm": 3.1159286499023438, |
|
"learning_rate": 2.2672315789473688e-05, |
|
"loss": 2.0677, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.004267617427004835, |
|
"grad_norm": 2.927952527999878, |
|
"learning_rate": 2.2145052631578948e-05, |
|
"loss": 1.6294, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.004294627663884612, |
|
"grad_norm": 2.920624256134033, |
|
"learning_rate": 2.1617789473684214e-05, |
|
"loss": 1.9123, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.00432163790076439, |
|
"grad_norm": 3.3584253787994385, |
|
"learning_rate": 2.1090526315789473e-05, |
|
"loss": 2.0167, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.004348648137644167, |
|
"grad_norm": 4.036590099334717, |
|
"learning_rate": 2.056326315789474e-05, |
|
"loss": 2.3696, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.004375658374523945, |
|
"grad_norm": 3.8149144649505615, |
|
"learning_rate": 2.0036000000000003e-05, |
|
"loss": 2.3095, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.004402668611403722, |
|
"grad_norm": 3.711400032043457, |
|
"learning_rate": 1.9508736842105266e-05, |
|
"loss": 1.9094, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.004429678848283499, |
|
"grad_norm": 3.479764699935913, |
|
"learning_rate": 1.898147368421053e-05, |
|
"loss": 2.1776, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.004456689085163277, |
|
"grad_norm": 4.012089729309082, |
|
"learning_rate": 1.8454210526315788e-05, |
|
"loss": 1.9562, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.004483699322043054, |
|
"grad_norm": 5.2212815284729, |
|
"learning_rate": 1.7926947368421054e-05, |
|
"loss": 2.3301, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0045107095589228315, |
|
"grad_norm": 4.928096294403076, |
|
"learning_rate": 1.7399684210526317e-05, |
|
"loss": 2.0416, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0045377197958026095, |
|
"grad_norm": 3.51525616645813, |
|
"learning_rate": 1.687242105263158e-05, |
|
"loss": 2.1433, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.004564730032682387, |
|
"grad_norm": 2.5053954124450684, |
|
"learning_rate": 1.6345157894736843e-05, |
|
"loss": 1.2284, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.004591740269562164, |
|
"grad_norm": 3.1079280376434326, |
|
"learning_rate": 1.5817894736842106e-05, |
|
"loss": 1.8039, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.004618750506441942, |
|
"grad_norm": 2.9641072750091553, |
|
"learning_rate": 1.529063157894737e-05, |
|
"loss": 1.7875, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.004645760743321719, |
|
"grad_norm": 2.7184035778045654, |
|
"learning_rate": 1.4763368421052632e-05, |
|
"loss": 1.7067, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.004672770980201496, |
|
"grad_norm": 2.5093767642974854, |
|
"learning_rate": 1.4236105263157895e-05, |
|
"loss": 1.5895, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.004699781217081274, |
|
"grad_norm": 3.6973133087158203, |
|
"learning_rate": 1.370884210526316e-05, |
|
"loss": 1.8151, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.004726791453961051, |
|
"grad_norm": 2.5547432899475098, |
|
"learning_rate": 1.318157894736842e-05, |
|
"loss": 1.5454, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.004753801690840828, |
|
"grad_norm": 3.5522589683532715, |
|
"learning_rate": 1.2654315789473685e-05, |
|
"loss": 1.8471, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.004780811927720606, |
|
"grad_norm": 2.897728681564331, |
|
"learning_rate": 1.2127052631578948e-05, |
|
"loss": 1.7892, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0048078221646003835, |
|
"grad_norm": 3.0070700645446777, |
|
"learning_rate": 1.1599789473684211e-05, |
|
"loss": 1.7458, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.004834832401480161, |
|
"grad_norm": 3.040104389190674, |
|
"learning_rate": 1.1072526315789474e-05, |
|
"loss": 1.6547, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.004861842638359939, |
|
"grad_norm": 2.3258042335510254, |
|
"learning_rate": 1.0545263157894737e-05, |
|
"loss": 1.2709, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004888852875239716, |
|
"grad_norm": 2.8469483852386475, |
|
"learning_rate": 1.0018000000000001e-05, |
|
"loss": 1.6427, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.004915863112119493, |
|
"grad_norm": 3.239359140396118, |
|
"learning_rate": 9.490736842105264e-06, |
|
"loss": 1.6716, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.004942873348999271, |
|
"grad_norm": 2.6865952014923096, |
|
"learning_rate": 8.963473684210527e-06, |
|
"loss": 1.693, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.004969883585879048, |
|
"grad_norm": 3.1464898586273193, |
|
"learning_rate": 8.43621052631579e-06, |
|
"loss": 1.9052, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.004996893822758826, |
|
"grad_norm": 2.902247905731201, |
|
"learning_rate": 7.908947368421053e-06, |
|
"loss": 1.8767, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.005023904059638603, |
|
"grad_norm": 2.765345573425293, |
|
"learning_rate": 7.381684210526316e-06, |
|
"loss": 1.4926, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.00505091429651838, |
|
"grad_norm": 3.451422691345215, |
|
"learning_rate": 6.85442105263158e-06, |
|
"loss": 1.9693, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.005077924533398158, |
|
"grad_norm": 2.784844398498535, |
|
"learning_rate": 6.3271578947368425e-06, |
|
"loss": 1.4707, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.005104934770277935, |
|
"grad_norm": 3.7692813873291016, |
|
"learning_rate": 5.7998947368421054e-06, |
|
"loss": 2.1994, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0051319450071577125, |
|
"grad_norm": 3.0001394748687744, |
|
"learning_rate": 5.272631578947368e-06, |
|
"loss": 1.5634, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0051589552440374905, |
|
"grad_norm": 2.9124486446380615, |
|
"learning_rate": 4.745368421052632e-06, |
|
"loss": 1.5113, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.005185965480917268, |
|
"grad_norm": 2.946713924407959, |
|
"learning_rate": 4.218105263157895e-06, |
|
"loss": 1.6126, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.005212975717797045, |
|
"grad_norm": 3.2477729320526123, |
|
"learning_rate": 3.690842105263158e-06, |
|
"loss": 1.362, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.005239985954676823, |
|
"grad_norm": 3.283214807510376, |
|
"learning_rate": 3.1635789473684213e-06, |
|
"loss": 1.6263, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.0052669961915566, |
|
"grad_norm": 2.9746735095977783, |
|
"learning_rate": 2.636315789473684e-06, |
|
"loss": 1.5695, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.005294006428436377, |
|
"grad_norm": 3.2501778602600098, |
|
"learning_rate": 2.1090526315789475e-06, |
|
"loss": 1.733, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.005321016665316155, |
|
"grad_norm": 2.839373826980591, |
|
"learning_rate": 1.5817894736842106e-06, |
|
"loss": 1.6794, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.005348026902195932, |
|
"grad_norm": 3.3260462284088135, |
|
"learning_rate": 1.0545263157894738e-06, |
|
"loss": 1.5539, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.005375037139075709, |
|
"grad_norm": 2.837555408477783, |
|
"learning_rate": 5.272631578947369e-07, |
|
"loss": 1.4777, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.005402047375955487, |
|
"grad_norm": 2.7860605716705322, |
|
"learning_rate": 0.0, |
|
"loss": 1.6072, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005402047375955487, |
|
"eval_loss": 1.8002580404281616, |
|
"eval_runtime": 323.0678, |
|
"eval_samples_per_second": 48.253, |
|
"eval_steps_per_second": 12.066, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5332652877938688.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|