|
{ |
|
"best_metric": 0.8324182680309459, |
|
"best_model_checkpoint": "results_retain/facebook/wav2vec2-base/42/checkpoint-30000", |
|
"epoch": 69.20415224913495, |
|
"eval_steps": 500, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.1534025374855825, |
|
"grad_norm": 4.369956970214844, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 3.9309, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1534025374855825, |
|
"eval_accuracy": 0.15310706264037935, |
|
"eval_f1_macro": 0.019883818836519816, |
|
"eval_loss": 3.409226894378662, |
|
"eval_runtime": 35.4301, |
|
"eval_samples_per_second": 226.192, |
|
"eval_steps_per_second": 7.084, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.306805074971165, |
|
"grad_norm": 14.16882610321045, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 2.5622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.306805074971165, |
|
"eval_accuracy": 0.541177938607437, |
|
"eval_f1_macro": 0.26987730386621905, |
|
"eval_loss": 1.8982651233673096, |
|
"eval_runtime": 35.0851, |
|
"eval_samples_per_second": 228.416, |
|
"eval_steps_per_second": 7.154, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.4602076124567476, |
|
"grad_norm": 9.389366149902344, |
|
"learning_rate": 0.00025, |
|
"loss": 1.6435, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.4602076124567476, |
|
"eval_accuracy": 0.6169203893186923, |
|
"eval_f1_macro": 0.37606688616805145, |
|
"eval_loss": 1.6092511415481567, |
|
"eval_runtime": 35.1816, |
|
"eval_samples_per_second": 227.79, |
|
"eval_steps_per_second": 7.134, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.61361014994233, |
|
"grad_norm": 11.559647560119629, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 1.4186, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.61361014994233, |
|
"eval_accuracy": 0.6561018218118293, |
|
"eval_f1_macro": 0.4408940890295375, |
|
"eval_loss": 1.540195345878601, |
|
"eval_runtime": 35.3339, |
|
"eval_samples_per_second": 226.808, |
|
"eval_steps_per_second": 7.104, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.767012687427912, |
|
"grad_norm": 11.722562789916992, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.3754, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.767012687427912, |
|
"eval_accuracy": 0.6512353381582231, |
|
"eval_f1_macro": 0.4529040361609232, |
|
"eval_loss": 1.5076924562454224, |
|
"eval_runtime": 35.3118, |
|
"eval_samples_per_second": 226.95, |
|
"eval_steps_per_second": 7.108, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.920415224913495, |
|
"grad_norm": 10.16735553741455, |
|
"learning_rate": 0.0005, |
|
"loss": 1.3999, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.920415224913495, |
|
"eval_accuracy": 0.6501123034689293, |
|
"eval_f1_macro": 0.4559819632486863, |
|
"eval_loss": 1.5159275531768799, |
|
"eval_runtime": 35.4302, |
|
"eval_samples_per_second": 226.191, |
|
"eval_steps_per_second": 7.084, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.073817762399077, |
|
"grad_norm": 8.510236740112305, |
|
"learning_rate": 0.0004907407407407408, |
|
"loss": 1.3843, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.073817762399077, |
|
"eval_accuracy": 0.6425006239081608, |
|
"eval_f1_macro": 0.45380481877858314, |
|
"eval_loss": 1.531354308128357, |
|
"eval_runtime": 35.0783, |
|
"eval_samples_per_second": 228.461, |
|
"eval_steps_per_second": 7.155, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.22722029988466, |
|
"grad_norm": 6.111170768737793, |
|
"learning_rate": 0.00048148148148148144, |
|
"loss": 1.2362, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.22722029988466, |
|
"eval_accuracy": 0.6679560768654854, |
|
"eval_f1_macro": 0.49259444116250145, |
|
"eval_loss": 1.4102381467819214, |
|
"eval_runtime": 35.1989, |
|
"eval_samples_per_second": 227.678, |
|
"eval_steps_per_second": 7.131, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.380622837370241, |
|
"grad_norm": 7.980649948120117, |
|
"learning_rate": 0.00047222222222222224, |
|
"loss": 1.111, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.380622837370241, |
|
"eval_accuracy": 0.6789368604941353, |
|
"eval_f1_macro": 0.49662640502149236, |
|
"eval_loss": 1.385581612586975, |
|
"eval_runtime": 35.3766, |
|
"eval_samples_per_second": 226.534, |
|
"eval_steps_per_second": 7.095, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.534025374855824, |
|
"grad_norm": 7.416294574737549, |
|
"learning_rate": 0.000462962962962963, |
|
"loss": 1.018, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.534025374855824, |
|
"eval_accuracy": 0.6690791115547792, |
|
"eval_f1_macro": 0.4911372457550657, |
|
"eval_loss": 1.451659917831421, |
|
"eval_runtime": 35.349, |
|
"eval_samples_per_second": 226.711, |
|
"eval_steps_per_second": 7.101, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.687427912341407, |
|
"grad_norm": 6.453460216522217, |
|
"learning_rate": 0.0004537037037037037, |
|
"loss": 0.9373, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.687427912341407, |
|
"eval_accuracy": 0.697903668579985, |
|
"eval_f1_macro": 0.5213519190207141, |
|
"eval_loss": 1.3074634075164795, |
|
"eval_runtime": 35.3636, |
|
"eval_samples_per_second": 226.617, |
|
"eval_steps_per_second": 7.098, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.84083044982699, |
|
"grad_norm": 5.483336925506592, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 0.8447, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.84083044982699, |
|
"eval_accuracy": 0.7037684052907413, |
|
"eval_f1_macro": 0.5403759594538143, |
|
"eval_loss": 1.2658469676971436, |
|
"eval_runtime": 35.3101, |
|
"eval_samples_per_second": 226.961, |
|
"eval_steps_per_second": 7.108, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.994232987312571, |
|
"grad_norm": 8.091373443603516, |
|
"learning_rate": 0.0004351851851851852, |
|
"loss": 0.7729, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.994232987312571, |
|
"eval_accuracy": 0.7111305215872223, |
|
"eval_f1_macro": 0.5506675562830741, |
|
"eval_loss": 1.2573933601379395, |
|
"eval_runtime": 35.2856, |
|
"eval_samples_per_second": 227.118, |
|
"eval_steps_per_second": 7.113, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 16.147635524798154, |
|
"grad_norm": 7.302002906799316, |
|
"learning_rate": 0.00042592592592592595, |
|
"loss": 0.6963, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.147635524798154, |
|
"eval_accuracy": 0.7262290990766159, |
|
"eval_f1_macro": 0.5559240421705101, |
|
"eval_loss": 1.2266888618469238, |
|
"eval_runtime": 36.5656, |
|
"eval_samples_per_second": 219.168, |
|
"eval_steps_per_second": 6.864, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.301038062283737, |
|
"grad_norm": 6.176177501678467, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 0.6499, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 17.301038062283737, |
|
"eval_accuracy": 0.7266034439730472, |
|
"eval_f1_macro": 0.549456325002494, |
|
"eval_loss": 1.245578646659851, |
|
"eval_runtime": 35.3365, |
|
"eval_samples_per_second": 226.791, |
|
"eval_steps_per_second": 7.103, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.45444059976932, |
|
"grad_norm": 6.318857192993164, |
|
"learning_rate": 0.0004074074074074074, |
|
"loss": 0.5983, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.45444059976932, |
|
"eval_accuracy": 0.7292238582480659, |
|
"eval_f1_macro": 0.575232140696358, |
|
"eval_loss": 1.2900265455245972, |
|
"eval_runtime": 35.5024, |
|
"eval_samples_per_second": 225.731, |
|
"eval_steps_per_second": 7.07, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.607843137254903, |
|
"grad_norm": 10.460654258728027, |
|
"learning_rate": 0.0003981481481481481, |
|
"loss": 0.5503, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 19.607843137254903, |
|
"eval_accuracy": 0.7196156725729973, |
|
"eval_f1_macro": 0.5556217816058379, |
|
"eval_loss": 1.3130799531936646, |
|
"eval_runtime": 35.1865, |
|
"eval_samples_per_second": 227.758, |
|
"eval_steps_per_second": 7.133, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.761245674740483, |
|
"grad_norm": 8.248889923095703, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 0.511, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.761245674740483, |
|
"eval_accuracy": 0.7455702520588969, |
|
"eval_f1_macro": 0.5823341220355842, |
|
"eval_loss": 1.2670824527740479, |
|
"eval_runtime": 35.4588, |
|
"eval_samples_per_second": 226.009, |
|
"eval_steps_per_second": 7.079, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.914648212226066, |
|
"grad_norm": 5.647006988525391, |
|
"learning_rate": 0.00037962962962962966, |
|
"loss": 0.4782, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 21.914648212226066, |
|
"eval_accuracy": 0.7422011479910158, |
|
"eval_f1_macro": 0.5962421465197172, |
|
"eval_loss": 1.3255208730697632, |
|
"eval_runtime": 35.2623, |
|
"eval_samples_per_second": 227.268, |
|
"eval_steps_per_second": 7.118, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 23.06805074971165, |
|
"grad_norm": 4.740267753601074, |
|
"learning_rate": 0.00037037037037037035, |
|
"loss": 0.4381, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 23.06805074971165, |
|
"eval_accuracy": 0.7499376091839282, |
|
"eval_f1_macro": 0.5833773217449505, |
|
"eval_loss": 1.1890977621078491, |
|
"eval_runtime": 35.3036, |
|
"eval_samples_per_second": 227.002, |
|
"eval_steps_per_second": 7.11, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 24.22145328719723, |
|
"grad_norm": 6.188416481018066, |
|
"learning_rate": 0.0003611111111111111, |
|
"loss": 0.3939, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 24.22145328719723, |
|
"eval_accuracy": 0.7560519091589718, |
|
"eval_f1_macro": 0.5989834099897938, |
|
"eval_loss": 1.2394686937332153, |
|
"eval_runtime": 35.2099, |
|
"eval_samples_per_second": 227.607, |
|
"eval_steps_per_second": 7.129, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 25.374855824682815, |
|
"grad_norm": 6.43159294128418, |
|
"learning_rate": 0.0003518518518518519, |
|
"loss": 0.3749, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 25.374855824682815, |
|
"eval_accuracy": 0.7571749438482656, |
|
"eval_f1_macro": 0.5924283240267194, |
|
"eval_loss": 1.2047547101974487, |
|
"eval_runtime": 35.0834, |
|
"eval_samples_per_second": 228.427, |
|
"eval_steps_per_second": 7.154, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 26.528258362168398, |
|
"grad_norm": 5.618147850036621, |
|
"learning_rate": 0.00034259259259259263, |
|
"loss": 0.3433, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 26.528258362168398, |
|
"eval_accuracy": 0.7448215622660345, |
|
"eval_f1_macro": 0.5914660771680106, |
|
"eval_loss": 1.2851983308792114, |
|
"eval_runtime": 34.8189, |
|
"eval_samples_per_second": 230.162, |
|
"eval_steps_per_second": 7.209, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 27.68166089965398, |
|
"grad_norm": 5.279151439666748, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.3211, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 27.68166089965398, |
|
"eval_accuracy": 0.7535562765160968, |
|
"eval_f1_macro": 0.6027492509194426, |
|
"eval_loss": 1.268044114112854, |
|
"eval_runtime": 34.8176, |
|
"eval_samples_per_second": 230.171, |
|
"eval_steps_per_second": 7.209, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 28.83506343713956, |
|
"grad_norm": 4.73464298248291, |
|
"learning_rate": 0.00032407407407407406, |
|
"loss": 0.3006, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 28.83506343713956, |
|
"eval_accuracy": 0.761417519341153, |
|
"eval_f1_macro": 0.6132345857728708, |
|
"eval_loss": 1.2127602100372314, |
|
"eval_runtime": 34.7848, |
|
"eval_samples_per_second": 230.388, |
|
"eval_steps_per_second": 7.216, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 29.988465974625143, |
|
"grad_norm": 5.955222129821777, |
|
"learning_rate": 0.0003148148148148148, |
|
"loss": 0.2799, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 29.988465974625143, |
|
"eval_accuracy": 0.7632892438233092, |
|
"eval_f1_macro": 0.6125965461682634, |
|
"eval_loss": 1.2151060104370117, |
|
"eval_runtime": 34.8242, |
|
"eval_samples_per_second": 230.128, |
|
"eval_steps_per_second": 7.208, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 31.141868512110726, |
|
"grad_norm": 8.98561954498291, |
|
"learning_rate": 0.0003055555555555556, |
|
"loss": 0.2657, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 31.141868512110726, |
|
"eval_accuracy": 0.7513102071375094, |
|
"eval_f1_macro": 0.6058015188813233, |
|
"eval_loss": 1.3033212423324585, |
|
"eval_runtime": 34.82, |
|
"eval_samples_per_second": 230.155, |
|
"eval_steps_per_second": 7.208, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 32.29527104959631, |
|
"grad_norm": 5.852510929107666, |
|
"learning_rate": 0.0002962962962962963, |
|
"loss": 0.2341, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 32.29527104959631, |
|
"eval_accuracy": 0.7716496131769404, |
|
"eval_f1_macro": 0.6079228678531986, |
|
"eval_loss": 1.2467527389526367, |
|
"eval_runtime": 34.7926, |
|
"eval_samples_per_second": 230.336, |
|
"eval_steps_per_second": 7.214, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 33.44867358708189, |
|
"grad_norm": 3.7392752170562744, |
|
"learning_rate": 0.00028703703703703703, |
|
"loss": 0.2268, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 33.44867358708189, |
|
"eval_accuracy": 0.7672822560519091, |
|
"eval_f1_macro": 0.6200186960995067, |
|
"eval_loss": 1.173102855682373, |
|
"eval_runtime": 34.7778, |
|
"eval_samples_per_second": 230.434, |
|
"eval_steps_per_second": 7.217, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 34.602076124567475, |
|
"grad_norm": 3.8082404136657715, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.2138, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 34.602076124567475, |
|
"eval_accuracy": 0.7682805091090591, |
|
"eval_f1_macro": 0.6089872489834017, |
|
"eval_loss": 1.2769969701766968, |
|
"eval_runtime": 34.8228, |
|
"eval_samples_per_second": 230.137, |
|
"eval_steps_per_second": 7.208, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 35.75547866205306, |
|
"grad_norm": 4.899130821228027, |
|
"learning_rate": 0.0002685185185185186, |
|
"loss": 0.2019, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 35.75547866205306, |
|
"eval_accuracy": 0.7803843274270027, |
|
"eval_f1_macro": 0.6410356899857027, |
|
"eval_loss": 1.182688593864441, |
|
"eval_runtime": 34.7724, |
|
"eval_samples_per_second": 230.47, |
|
"eval_steps_per_second": 7.218, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 36.90888119953864, |
|
"grad_norm": 4.297879219055176, |
|
"learning_rate": 0.00025925925925925926, |
|
"loss": 0.1832, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 36.90888119953864, |
|
"eval_accuracy": 0.7696531070626403, |
|
"eval_f1_macro": 0.628787412236927, |
|
"eval_loss": 1.2591631412506104, |
|
"eval_runtime": 34.7726, |
|
"eval_samples_per_second": 230.469, |
|
"eval_steps_per_second": 7.218, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 38.062283737024224, |
|
"grad_norm": 4.6692214012146, |
|
"learning_rate": 0.00025, |
|
"loss": 0.1737, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 38.062283737024224, |
|
"eval_accuracy": 0.7709009233840779, |
|
"eval_f1_macro": 0.6107952760204466, |
|
"eval_loss": 1.2620958089828491, |
|
"eval_runtime": 36.0409, |
|
"eval_samples_per_second": 222.358, |
|
"eval_steps_per_second": 6.964, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 39.21568627450981, |
|
"grad_norm": 3.8761215209960938, |
|
"learning_rate": 0.00024074074074074072, |
|
"loss": 0.1574, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 39.21568627450981, |
|
"eval_accuracy": 0.7748939356126778, |
|
"eval_f1_macro": 0.6391500982858909, |
|
"eval_loss": 1.3058395385742188, |
|
"eval_runtime": 36.1321, |
|
"eval_samples_per_second": 221.797, |
|
"eval_steps_per_second": 6.947, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 40.36908881199539, |
|
"grad_norm": 3.7799384593963623, |
|
"learning_rate": 0.0002314814814814815, |
|
"loss": 0.1494, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 40.36908881199539, |
|
"eval_accuracy": 0.7775143498876965, |
|
"eval_f1_macro": 0.634463633421762, |
|
"eval_loss": 1.2541757822036743, |
|
"eval_runtime": 36.0992, |
|
"eval_samples_per_second": 222.0, |
|
"eval_steps_per_second": 6.953, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 41.522491349480966, |
|
"grad_norm": 3.6442158222198486, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 0.1428, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 41.522491349480966, |
|
"eval_accuracy": 0.7815073621162965, |
|
"eval_f1_macro": 0.6354469092694762, |
|
"eval_loss": 1.2337605953216553, |
|
"eval_runtime": 36.0255, |
|
"eval_samples_per_second": 222.453, |
|
"eval_steps_per_second": 6.967, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 42.67589388696655, |
|
"grad_norm": 3.6234309673309326, |
|
"learning_rate": 0.00021296296296296298, |
|
"loss": 0.1273, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 42.67589388696655, |
|
"eval_accuracy": 0.7836286498627402, |
|
"eval_f1_macro": 0.6407864496886617, |
|
"eval_loss": 1.2769566774368286, |
|
"eval_runtime": 35.6646, |
|
"eval_samples_per_second": 224.705, |
|
"eval_steps_per_second": 7.038, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 43.82929642445213, |
|
"grad_norm": 2.8652567863464355, |
|
"learning_rate": 0.0002037037037037037, |
|
"loss": 0.1181, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 43.82929642445213, |
|
"eval_accuracy": 0.7831295233341652, |
|
"eval_f1_macro": 0.6496093521953449, |
|
"eval_loss": 1.3619425296783447, |
|
"eval_runtime": 36.0707, |
|
"eval_samples_per_second": 222.175, |
|
"eval_steps_per_second": 6.959, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 44.982698961937714, |
|
"grad_norm": 5.1563191413879395, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 0.1139, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 44.982698961937714, |
|
"eval_accuracy": 0.7891190416770651, |
|
"eval_f1_macro": 0.639353026374818, |
|
"eval_loss": 1.231618881225586, |
|
"eval_runtime": 35.9657, |
|
"eval_samples_per_second": 222.823, |
|
"eval_steps_per_second": 6.979, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 46.1361014994233, |
|
"grad_norm": 1.5734127759933472, |
|
"learning_rate": 0.00018518518518518518, |
|
"loss": 0.1036, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 46.1361014994233, |
|
"eval_accuracy": 0.7962315947092587, |
|
"eval_f1_macro": 0.6474614824910541, |
|
"eval_loss": 1.2083463668823242, |
|
"eval_runtime": 35.5967, |
|
"eval_samples_per_second": 225.133, |
|
"eval_steps_per_second": 7.051, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 47.28950403690888, |
|
"grad_norm": 0.9659342765808105, |
|
"learning_rate": 0.00017592592592592595, |
|
"loss": 0.0984, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 47.28950403690888, |
|
"eval_accuracy": 0.7961068130771151, |
|
"eval_f1_macro": 0.6731347859037599, |
|
"eval_loss": 1.2197343111038208, |
|
"eval_runtime": 40.3877, |
|
"eval_samples_per_second": 198.427, |
|
"eval_steps_per_second": 6.215, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 48.44290657439446, |
|
"grad_norm": 3.5627315044403076, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.0949, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 48.44290657439446, |
|
"eval_accuracy": 0.7923633641128026, |
|
"eval_f1_macro": 0.6493884652975611, |
|
"eval_loss": 1.255406379699707, |
|
"eval_runtime": 37.0454, |
|
"eval_samples_per_second": 216.329, |
|
"eval_steps_per_second": 6.775, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 49.596309111880046, |
|
"grad_norm": 2.2700695991516113, |
|
"learning_rate": 0.0001574074074074074, |
|
"loss": 0.0822, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 49.596309111880046, |
|
"eval_accuracy": 0.7922385824806588, |
|
"eval_f1_macro": 0.6543829429647029, |
|
"eval_loss": 1.2859455347061157, |
|
"eval_runtime": 36.8794, |
|
"eval_samples_per_second": 217.303, |
|
"eval_steps_per_second": 6.806, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 50.74971164936563, |
|
"grad_norm": 2.4785401821136475, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.0794, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 50.74971164936563, |
|
"eval_accuracy": 0.7938607436985275, |
|
"eval_f1_macro": 0.6632807007483084, |
|
"eval_loss": 1.3257797956466675, |
|
"eval_runtime": 65.9338, |
|
"eval_samples_per_second": 121.546, |
|
"eval_steps_per_second": 3.807, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 51.90311418685121, |
|
"grad_norm": 5.8285675048828125, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.071, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 51.90311418685121, |
|
"eval_accuracy": 0.7877464437234839, |
|
"eval_f1_macro": 0.6514161143660198, |
|
"eval_loss": 1.3142781257629395, |
|
"eval_runtime": 63.1498, |
|
"eval_samples_per_second": 126.905, |
|
"eval_steps_per_second": 3.975, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 53.056516724336795, |
|
"grad_norm": 2.3869199752807617, |
|
"learning_rate": 0.00012962962962962963, |
|
"loss": 0.0632, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 53.056516724336795, |
|
"eval_accuracy": 0.7967307212378338, |
|
"eval_f1_macro": 0.6650145750564478, |
|
"eval_loss": 1.2448372840881348, |
|
"eval_runtime": 34.9343, |
|
"eval_samples_per_second": 229.402, |
|
"eval_steps_per_second": 7.185, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 54.20991926182238, |
|
"grad_norm": 1.7126274108886719, |
|
"learning_rate": 0.00012037037037037036, |
|
"loss": 0.0592, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 54.20991926182238, |
|
"eval_accuracy": 0.8083354130272024, |
|
"eval_f1_macro": 0.653147976938688, |
|
"eval_loss": 1.273723840713501, |
|
"eval_runtime": 35.1779, |
|
"eval_samples_per_second": 227.813, |
|
"eval_steps_per_second": 7.135, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 55.36332179930796, |
|
"grad_norm": 1.4396089315414429, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 0.0551, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 55.36332179930796, |
|
"eval_accuracy": 0.8022211130521587, |
|
"eval_f1_macro": 0.6674628853192668, |
|
"eval_loss": 1.289405107498169, |
|
"eval_runtime": 35.37, |
|
"eval_samples_per_second": 226.576, |
|
"eval_steps_per_second": 7.096, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 56.516724336793544, |
|
"grad_norm": 2.4105160236358643, |
|
"learning_rate": 0.00010185185185185185, |
|
"loss": 0.0495, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 56.516724336793544, |
|
"eval_accuracy": 0.803094584477165, |
|
"eval_f1_macro": 0.6638655285331525, |
|
"eval_loss": 1.241363525390625, |
|
"eval_runtime": 35.1653, |
|
"eval_samples_per_second": 227.895, |
|
"eval_steps_per_second": 7.138, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 57.67012687427912, |
|
"grad_norm": 3.4061028957366943, |
|
"learning_rate": 9.259259259259259e-05, |
|
"loss": 0.0449, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 57.67012687427912, |
|
"eval_accuracy": 0.8077115048664837, |
|
"eval_f1_macro": 0.6679687949981502, |
|
"eval_loss": 1.2535569667816162, |
|
"eval_runtime": 35.1863, |
|
"eval_samples_per_second": 227.759, |
|
"eval_steps_per_second": 7.133, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 58.8235294117647, |
|
"grad_norm": 4.6967363357543945, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.0412, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 58.8235294117647, |
|
"eval_accuracy": 0.8135762415772398, |
|
"eval_f1_macro": 0.6707472142083073, |
|
"eval_loss": 1.2510894536972046, |
|
"eval_runtime": 35.1709, |
|
"eval_samples_per_second": 227.859, |
|
"eval_steps_per_second": 7.137, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 59.976931949250286, |
|
"grad_norm": 1.2903478145599365, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.0343, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 59.976931949250286, |
|
"eval_accuracy": 0.8100823558772149, |
|
"eval_f1_macro": 0.6867023078289406, |
|
"eval_loss": 1.326628565788269, |
|
"eval_runtime": 35.2142, |
|
"eval_samples_per_second": 227.579, |
|
"eval_steps_per_second": 7.128, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 61.13033448673587, |
|
"grad_norm": 4.929038047790527, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 0.0325, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 61.13033448673587, |
|
"eval_accuracy": 0.8146992762665336, |
|
"eval_f1_macro": 0.6813981931163788, |
|
"eval_loss": 1.2899118661880493, |
|
"eval_runtime": 35.3037, |
|
"eval_samples_per_second": 227.002, |
|
"eval_steps_per_second": 7.11, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 62.28373702422145, |
|
"grad_norm": 0.04143017157912254, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 0.0278, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 62.28373702422145, |
|
"eval_accuracy": 0.818941851759421, |
|
"eval_f1_macro": 0.6660686006005503, |
|
"eval_loss": 1.2699165344238281, |
|
"eval_runtime": 35.3107, |
|
"eval_samples_per_second": 226.957, |
|
"eval_steps_per_second": 7.108, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 63.437139561707035, |
|
"grad_norm": 1.8004887104034424, |
|
"learning_rate": 4.6296296296296294e-05, |
|
"loss": 0.0265, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 63.437139561707035, |
|
"eval_accuracy": 0.8170701272772648, |
|
"eval_f1_macro": 0.6702275226911916, |
|
"eval_loss": 1.2782015800476074, |
|
"eval_runtime": 35.0186, |
|
"eval_samples_per_second": 228.85, |
|
"eval_steps_per_second": 7.168, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 64.59054209919262, |
|
"grad_norm": 0.07229655981063843, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0224, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 64.59054209919262, |
|
"eval_accuracy": 0.8225605190915897, |
|
"eval_f1_macro": 0.6808030174100609, |
|
"eval_loss": 1.2255030870437622, |
|
"eval_runtime": 34.8632, |
|
"eval_samples_per_second": 229.87, |
|
"eval_steps_per_second": 7.2, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 65.7439446366782, |
|
"grad_norm": 1.0921183824539185, |
|
"learning_rate": 2.7777777777777776e-05, |
|
"loss": 0.0182, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 65.7439446366782, |
|
"eval_accuracy": 0.8276765660094834, |
|
"eval_f1_macro": 0.6838139982176796, |
|
"eval_loss": 1.2417439222335815, |
|
"eval_runtime": 34.79, |
|
"eval_samples_per_second": 230.353, |
|
"eval_steps_per_second": 7.215, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 66.89734717416378, |
|
"grad_norm": 0.2541353404521942, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0189, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 66.89734717416378, |
|
"eval_accuracy": 0.8255552782630396, |
|
"eval_f1_macro": 0.6846466970174251, |
|
"eval_loss": 1.1945445537567139, |
|
"eval_runtime": 35.2088, |
|
"eval_samples_per_second": 227.613, |
|
"eval_steps_per_second": 7.129, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 68.05074971164936, |
|
"grad_norm": 0.49906083941459656, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0173, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 68.05074971164936, |
|
"eval_accuracy": 0.8311704517095083, |
|
"eval_f1_macro": 0.6982812455994792, |
|
"eval_loss": 1.1982783079147339, |
|
"eval_runtime": 35.2862, |
|
"eval_samples_per_second": 227.114, |
|
"eval_steps_per_second": 7.113, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 69.20415224913495, |
|
"grad_norm": 0.035026829689741135, |
|
"learning_rate": 0.0, |
|
"loss": 0.0139, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 69.20415224913495, |
|
"eval_accuracy": 0.8324182680309459, |
|
"eval_f1_macro": 0.7059020357212862, |
|
"eval_loss": 1.1972322463989258, |
|
"eval_runtime": 35.1374, |
|
"eval_samples_per_second": 228.076, |
|
"eval_steps_per_second": 7.143, |
|
"step": 30000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 70, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7427529644770302e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|