{ "best_metric": 0.8450212128774645, "best_model_checkpoint": "results_spkr/facebook/hubert-base-ls960/42/checkpoint-30000", "epoch": 66.11570247933884, "eval_steps": 1000, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.1019283746556474, "grad_norm": 2.1648571491241455, "learning_rate": 8.333333333333333e-05, "loss": 3.9795, "step": 500 }, { "epoch": 2.203856749311295, "grad_norm": 4.377623558044434, "learning_rate": 0.00016666666666666666, "loss": 3.3812, "step": 1000 }, { "epoch": 2.203856749311295, "eval_accuracy": 0.3530072373346643, "eval_f1_macro": 0.12188831557947695, "eval_loss": 2.5676393508911133, "eval_runtime": 34.2451, "eval_samples_per_second": 234.019, "eval_steps_per_second": 7.33, "step": 1000 }, { "epoch": 3.3057851239669422, "grad_norm": 7.630483150482178, "learning_rate": 0.00025, "loss": 2.1911, "step": 1500 }, { "epoch": 4.40771349862259, "grad_norm": 5.876112937927246, "learning_rate": 0.0003333333333333333, "loss": 1.6149, "step": 2000 }, { "epoch": 4.40771349862259, "eval_accuracy": 0.6426254055403045, "eval_f1_macro": 0.43369073962027527, "eval_loss": 1.4151935577392578, "eval_runtime": 34.3436, "eval_samples_per_second": 233.348, "eval_steps_per_second": 7.308, "step": 2000 }, { "epoch": 5.509641873278237, "grad_norm": 7.966612815856934, "learning_rate": 0.0004166666666666667, "loss": 1.4188, "step": 2500 }, { "epoch": 6.6115702479338845, "grad_norm": 8.371646881103516, "learning_rate": 0.0005, "loss": 1.3684, "step": 3000 }, { "epoch": 6.6115702479338845, "eval_accuracy": 0.6482405789867731, "eval_f1_macro": 0.44883222285514607, "eval_loss": 1.4348788261413574, "eval_runtime": 34.2946, "eval_samples_per_second": 233.681, "eval_steps_per_second": 7.319, "step": 3000 }, { "epoch": 7.7134986225895315, "grad_norm": 8.911748886108398, "learning_rate": 0.0004907407407407408, "loss": 1.2929, "step": 3500 }, { "epoch": 8.81542699724518, "grad_norm": 6.721097469329834, "learning_rate": 0.00048148148148148144, "loss": 1.1801, "step": 4000 }, { "epoch": 8.81542699724518, "eval_accuracy": 0.6799351135512852, "eval_f1_macro": 0.5146178480725373, "eval_loss": 1.3011326789855957, "eval_runtime": 34.1837, "eval_samples_per_second": 234.439, "eval_steps_per_second": 7.343, "step": 4000 }, { "epoch": 9.917355371900827, "grad_norm": 5.8066487312316895, "learning_rate": 0.00047222222222222224, "loss": 1.077, "step": 4500 }, { "epoch": 11.019283746556473, "grad_norm": 7.106940269470215, "learning_rate": 0.000462962962962963, "loss": 0.9864, "step": 5000 }, { "epoch": 11.019283746556473, "eval_accuracy": 0.6970301971549788, "eval_f1_macro": 0.5440711094453805, "eval_loss": 1.2665235996246338, "eval_runtime": 34.0648, "eval_samples_per_second": 235.257, "eval_steps_per_second": 7.368, "step": 5000 }, { "epoch": 12.121212121212121, "grad_norm": 7.153928279876709, "learning_rate": 0.0004537037037037037, "loss": 0.8802, "step": 5500 }, { "epoch": 13.223140495867769, "grad_norm": 6.129940986633301, "learning_rate": 0.0004444444444444444, "loss": 0.8463, "step": 6000 }, { "epoch": 13.223140495867769, "eval_accuracy": 0.7228599950087348, "eval_f1_macro": 0.5713293380059555, "eval_loss": 1.2308920621871948, "eval_runtime": 34.3879, "eval_samples_per_second": 233.047, "eval_steps_per_second": 7.299, "step": 6000 }, { "epoch": 14.325068870523417, "grad_norm": 5.996703147888184, "learning_rate": 0.0004351851851851852, "loss": 0.7901, "step": 6500 }, { "epoch": 15.426997245179063, "grad_norm": 5.674803733825684, "learning_rate": 0.00042592592592592595, "loss": 0.7327, "step": 7000 }, { "epoch": 15.426997245179063, "eval_accuracy": 0.7256051909158971, "eval_f1_macro": 0.5774070067259433, "eval_loss": 1.1890807151794434, "eval_runtime": 34.2921, "eval_samples_per_second": 233.698, "eval_steps_per_second": 7.319, "step": 7000 }, { "epoch": 16.52892561983471, "grad_norm": 7.201590061187744, "learning_rate": 0.0004166666666666667, "loss": 0.674, "step": 7500 }, { "epoch": 17.63085399449036, "grad_norm": 5.022010803222656, "learning_rate": 0.0004074074074074074, "loss": 0.621, "step": 8000 }, { "epoch": 17.63085399449036, "eval_accuracy": 0.7473171949089094, "eval_f1_macro": 0.6042671922005475, "eval_loss": 1.1247931718826294, "eval_runtime": 34.2291, "eval_samples_per_second": 234.129, "eval_steps_per_second": 7.333, "step": 8000 }, { "epoch": 18.732782369146005, "grad_norm": 4.878725051879883, "learning_rate": 0.0003981481481481481, "loss": 0.586, "step": 8500 }, { "epoch": 19.834710743801654, "grad_norm": 8.39840316772461, "learning_rate": 0.0003888888888888889, "loss": 0.5424, "step": 9000 }, { "epoch": 19.834710743801654, "eval_accuracy": 0.7626653356625904, "eval_f1_macro": 0.6211572479223255, "eval_loss": 1.1348741054534912, "eval_runtime": 34.3062, "eval_samples_per_second": 233.602, "eval_steps_per_second": 7.316, "step": 9000 }, { "epoch": 20.9366391184573, "grad_norm": 6.210509300231934, "learning_rate": 0.00037962962962962966, "loss": 0.5077, "step": 9500 }, { "epoch": 22.038567493112946, "grad_norm": 6.545920372009277, "learning_rate": 0.00037037037037037035, "loss": 0.4699, "step": 10000 }, { "epoch": 22.038567493112946, "eval_accuracy": 0.7736461192912403, "eval_f1_macro": 0.6325521340296587, "eval_loss": 1.079952359199524, "eval_runtime": 34.2801, "eval_samples_per_second": 233.78, "eval_steps_per_second": 7.322, "step": 10000 }, { "epoch": 23.140495867768596, "grad_norm": 7.236077785491943, "learning_rate": 0.0003611111111111111, "loss": 0.4345, "step": 10500 }, { "epoch": 24.242424242424242, "grad_norm": 5.1988935470581055, "learning_rate": 0.0003518518518518519, "loss": 0.4262, "step": 11000 }, { "epoch": 24.242424242424242, "eval_accuracy": 0.7569253805839781, "eval_f1_macro": 0.6024754626538867, "eval_loss": 1.1676768064498901, "eval_runtime": 34.244, "eval_samples_per_second": 234.026, "eval_steps_per_second": 7.33, "step": 11000 }, { "epoch": 25.34435261707989, "grad_norm": 5.730859279632568, "learning_rate": 0.00034259259259259263, "loss": 0.3863, "step": 11500 }, { "epoch": 26.446280991735538, "grad_norm": 6.015942096710205, "learning_rate": 0.0003333333333333333, "loss": 0.3686, "step": 12000 }, { "epoch": 26.446280991735538, "eval_accuracy": 0.7705265784876466, "eval_f1_macro": 0.6344537073316104, "eval_loss": 1.1093910932540894, "eval_runtime": 34.3537, "eval_samples_per_second": 233.279, "eval_steps_per_second": 7.306, "step": 12000 }, { "epoch": 27.548209366391184, "grad_norm": 3.6527628898620605, "learning_rate": 0.00032407407407407406, "loss": 0.3472, "step": 12500 }, { "epoch": 28.650137741046834, "grad_norm": 7.555639266967773, "learning_rate": 0.0003148148148148148, "loss": 0.3227, "step": 13000 }, { "epoch": 28.650137741046834, "eval_accuracy": 0.7781382580484153, "eval_f1_macro": 0.6653992967508426, "eval_loss": 1.1840604543685913, "eval_runtime": 34.231, "eval_samples_per_second": 234.115, "eval_steps_per_second": 7.333, "step": 13000 }, { "epoch": 29.75206611570248, "grad_norm": 4.181336879730225, "learning_rate": 0.0003055555555555556, "loss": 0.3067, "step": 13500 }, { "epoch": 30.853994490358126, "grad_norm": 2.8446598052978516, "learning_rate": 0.0002962962962962963, "loss": 0.2925, "step": 14000 }, { "epoch": 30.853994490358126, "eval_accuracy": 0.777264786623409, "eval_f1_macro": 0.6360507792019267, "eval_loss": 1.1172969341278076, "eval_runtime": 34.1558, "eval_samples_per_second": 234.631, "eval_steps_per_second": 7.349, "step": 14000 }, { "epoch": 31.955922865013775, "grad_norm": 8.023120880126953, "learning_rate": 0.00028703703703703703, "loss": 0.2787, "step": 14500 }, { "epoch": 33.05785123966942, "grad_norm": 5.497787952423096, "learning_rate": 0.0002777777777777778, "loss": 0.2544, "step": 15000 }, { "epoch": 33.05785123966942, "eval_accuracy": 0.7826303968055902, "eval_f1_macro": 0.6453128691955757, "eval_loss": 1.183886170387268, "eval_runtime": 34.1092, "eval_samples_per_second": 234.951, "eval_steps_per_second": 7.359, "step": 15000 }, { "epoch": 34.15977961432507, "grad_norm": 3.5579934120178223, "learning_rate": 0.0002685185185185186, "loss": 0.2474, "step": 15500 }, { "epoch": 35.26170798898072, "grad_norm": 2.602411985397339, "learning_rate": 0.00025925925925925926, "loss": 0.2328, "step": 16000 }, { "epoch": 35.26170798898072, "eval_accuracy": 0.7882455702520589, "eval_f1_macro": 0.662655022651063, "eval_loss": 1.2161318063735962, "eval_runtime": 34.0994, "eval_samples_per_second": 235.019, "eval_steps_per_second": 7.361, "step": 16000 }, { "epoch": 36.36363636363637, "grad_norm": 4.725533962249756, "learning_rate": 0.00025, "loss": 0.2184, "step": 16500 }, { "epoch": 37.46556473829201, "grad_norm": 4.841740131378174, "learning_rate": 0.00024074074074074072, "loss": 0.1982, "step": 17000 }, { "epoch": 37.46556473829201, "eval_accuracy": 0.8039680559021712, "eval_f1_macro": 0.6843361716262795, "eval_loss": 1.1079691648483276, "eval_runtime": 34.1118, "eval_samples_per_second": 234.934, "eval_steps_per_second": 7.358, "step": 17000 }, { "epoch": 38.56749311294766, "grad_norm": 4.069717884063721, "learning_rate": 0.0002314814814814815, "loss": 0.1953, "step": 17500 }, { "epoch": 39.66942148760331, "grad_norm": 3.4432101249694824, "learning_rate": 0.0002222222222222222, "loss": 0.1857, "step": 18000 }, { "epoch": 39.66942148760331, "eval_accuracy": 0.7961068130771151, "eval_f1_macro": 0.6653878238312634, "eval_loss": 1.1428285837173462, "eval_runtime": 34.0833, "eval_samples_per_second": 235.13, "eval_steps_per_second": 7.364, "step": 18000 }, { "epoch": 40.77134986225895, "grad_norm": 2.9134092330932617, "learning_rate": 0.00021296296296296298, "loss": 0.1761, "step": 18500 }, { "epoch": 41.8732782369146, "grad_norm": 1.9777594804763794, "learning_rate": 0.0002037037037037037, "loss": 0.1674, "step": 19000 }, { "epoch": 41.8732782369146, "eval_accuracy": 0.7997254804092837, "eval_f1_macro": 0.6689441664878049, "eval_loss": 1.1227729320526123, "eval_runtime": 33.8909, "eval_samples_per_second": 236.465, "eval_steps_per_second": 7.406, "step": 19000 }, { "epoch": 42.97520661157025, "grad_norm": 1.4364069700241089, "learning_rate": 0.00019444444444444446, "loss": 0.1563, "step": 19500 }, { "epoch": 44.07713498622589, "grad_norm": 4.286064624786377, "learning_rate": 0.00018518518518518518, "loss": 0.1445, "step": 20000 }, { "epoch": 44.07713498622589, "eval_accuracy": 0.8112053905665086, "eval_f1_macro": 0.6809112936386723, "eval_loss": 1.1481138467788696, "eval_runtime": 33.8788, "eval_samples_per_second": 236.549, "eval_steps_per_second": 7.409, "step": 20000 }, { "epoch": 45.17906336088154, "grad_norm": 5.821794033050537, "learning_rate": 0.00017592592592592595, "loss": 0.1319, "step": 20500 }, { "epoch": 46.28099173553719, "grad_norm": 3.9480347633361816, "learning_rate": 0.00016666666666666666, "loss": 0.1258, "step": 21000 }, { "epoch": 46.28099173553719, "eval_accuracy": 0.8127027701522336, "eval_f1_macro": 0.67815443140521, "eval_loss": 1.074927568435669, "eval_runtime": 34.0974, "eval_samples_per_second": 235.032, "eval_steps_per_second": 7.361, "step": 21000 }, { "epoch": 47.382920110192835, "grad_norm": 3.4662113189697266, "learning_rate": 0.0001574074074074074, "loss": 0.1254, "step": 21500 }, { "epoch": 48.484848484848484, "grad_norm": 0.3864861726760864, "learning_rate": 0.00014814814814814815, "loss": 0.105, "step": 22000 }, { "epoch": 48.484848484848484, "eval_accuracy": 0.822934863988021, "eval_f1_macro": 0.7119269019070541, "eval_loss": 1.0975977182388306, "eval_runtime": 34.0991, "eval_samples_per_second": 235.021, "eval_steps_per_second": 7.361, "step": 22000 }, { "epoch": 49.586776859504134, "grad_norm": 3.2124674320220947, "learning_rate": 0.0001388888888888889, "loss": 0.1095, "step": 22500 }, { "epoch": 50.68870523415978, "grad_norm": 4.872037410736084, "learning_rate": 0.00012962962962962963, "loss": 0.1036, "step": 23000 }, { "epoch": 50.68870523415978, "eval_accuracy": 0.8161966558522585, "eval_f1_macro": 0.7087036359748538, "eval_loss": 1.1118457317352295, "eval_runtime": 34.0917, "eval_samples_per_second": 235.072, "eval_steps_per_second": 7.363, "step": 23000 }, { "epoch": 51.790633608815426, "grad_norm": 4.869659423828125, "learning_rate": 0.00012037037037037036, "loss": 0.0869, "step": 23500 }, { "epoch": 52.892561983471076, "grad_norm": 3.3753907680511475, "learning_rate": 0.0001111111111111111, "loss": 0.0834, "step": 24000 }, { "epoch": 52.892561983471076, "eval_accuracy": 0.8265535313201897, "eval_f1_macro": 0.7253582768488744, "eval_loss": 1.1696484088897705, "eval_runtime": 34.1116, "eval_samples_per_second": 234.935, "eval_steps_per_second": 7.358, "step": 24000 }, { "epoch": 53.99449035812672, "grad_norm": 3.1387767791748047, "learning_rate": 0.00010185185185185185, "loss": 0.0816, "step": 24500 }, { "epoch": 55.09641873278237, "grad_norm": 2.163699150085449, "learning_rate": 9.259259259259259e-05, "loss": 0.0701, "step": 25000 }, { "epoch": 55.09641873278237, "eval_accuracy": 0.8312952333416521, "eval_f1_macro": 0.7321580302363878, "eval_loss": 1.1293047666549683, "eval_runtime": 34.5857, "eval_samples_per_second": 231.714, "eval_steps_per_second": 7.257, "step": 25000 }, { "epoch": 56.19834710743802, "grad_norm": 2.4210057258605957, "learning_rate": 8.333333333333333e-05, "loss": 0.0678, "step": 25500 }, { "epoch": 57.30027548209367, "grad_norm": 3.216862201690674, "learning_rate": 7.407407407407407e-05, "loss": 0.066, "step": 26000 }, { "epoch": 57.30027548209367, "eval_accuracy": 0.8350386823059646, "eval_f1_macro": 0.7214214084728681, "eval_loss": 1.1439422369003296, "eval_runtime": 34.6333, "eval_samples_per_second": 231.396, "eval_steps_per_second": 7.247, "step": 26000 }, { "epoch": 58.40220385674931, "grad_norm": 0.19445660710334778, "learning_rate": 6.481481481481482e-05, "loss": 0.0609, "step": 26500 }, { "epoch": 59.50413223140496, "grad_norm": 0.7729499936103821, "learning_rate": 5.555555555555555e-05, "loss": 0.0568, "step": 27000 }, { "epoch": 59.50413223140496, "eval_accuracy": 0.8364112802595458, "eval_f1_macro": 0.7324611105759018, "eval_loss": 1.1670494079589844, "eval_runtime": 34.1194, "eval_samples_per_second": 234.881, "eval_steps_per_second": 7.357, "step": 27000 }, { "epoch": 60.60606060606061, "grad_norm": 1.3117353916168213, "learning_rate": 4.6296296296296294e-05, "loss": 0.0498, "step": 27500 }, { "epoch": 61.70798898071625, "grad_norm": 1.9914586544036865, "learning_rate": 3.7037037037037037e-05, "loss": 0.0534, "step": 28000 }, { "epoch": 61.70798898071625, "eval_accuracy": 0.840778637384577, "eval_f1_macro": 0.7529182508124644, "eval_loss": 1.107100248336792, "eval_runtime": 34.1776, "eval_samples_per_second": 234.481, "eval_steps_per_second": 7.344, "step": 28000 }, { "epoch": 62.8099173553719, "grad_norm": 0.35563403367996216, "learning_rate": 2.7777777777777776e-05, "loss": 0.0477, "step": 28500 }, { "epoch": 63.91184573002755, "grad_norm": 1.6338849067687988, "learning_rate": 1.8518518518518518e-05, "loss": 0.0409, "step": 29000 }, { "epoch": 63.91184573002755, "eval_accuracy": 0.8443973047167457, "eval_f1_macro": 0.7357533921453874, "eval_loss": 1.125234842300415, "eval_runtime": 34.1121, "eval_samples_per_second": 234.931, "eval_steps_per_second": 7.358, "step": 29000 }, { "epoch": 65.0137741046832, "grad_norm": 2.1069159507751465, "learning_rate": 9.259259259259259e-06, "loss": 0.0375, "step": 29500 }, { "epoch": 66.11570247933884, "grad_norm": 0.2921960949897766, "learning_rate": 0.0, "loss": 0.0388, "step": 30000 }, { "epoch": 66.11570247933884, "eval_accuracy": 0.8450212128774645, "eval_f1_macro": 0.7384390482437365, "eval_loss": 1.1227383613586426, "eval_runtime": 34.379, "eval_samples_per_second": 233.108, "eval_steps_per_second": 7.301, "step": 30000 } ], "logging_steps": 500, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 67, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7427257222137345e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }