|
{ |
|
"best_metric": 0.8415273271774395, |
|
"best_model_checkpoint": "results_retain/facebook/hubert-base-ls960/42/checkpoint-30000", |
|
"epoch": 69.20415224913495, |
|
"eval_steps": 1000, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.1534025374855825, |
|
"grad_norm": 2.9732823371887207, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 3.9827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.306805074971165, |
|
"grad_norm": 3.6868040561676025, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 3.3559, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.306805074971165, |
|
"eval_accuracy": 0.33953082106313953, |
|
"eval_f1_macro": 0.12928496278744922, |
|
"eval_loss": 2.5730652809143066, |
|
"eval_runtime": 35.334, |
|
"eval_samples_per_second": 226.807, |
|
"eval_steps_per_second": 7.104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.4602076124567476, |
|
"grad_norm": 7.014188766479492, |
|
"learning_rate": 0.00025, |
|
"loss": 2.1949, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.61361014994233, |
|
"grad_norm": 6.496627330780029, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 1.6389, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.61361014994233, |
|
"eval_accuracy": 0.6416271524831545, |
|
"eval_f1_macro": 0.4299797469182877, |
|
"eval_loss": 1.4779495000839233, |
|
"eval_runtime": 35.3694, |
|
"eval_samples_per_second": 226.58, |
|
"eval_steps_per_second": 7.097, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.767012687427912, |
|
"grad_norm": 7.616945266723633, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.4162, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.920415224913495, |
|
"grad_norm": 8.488947868347168, |
|
"learning_rate": 0.0005, |
|
"loss": 1.3587, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.920415224913495, |
|
"eval_accuracy": 0.6574744197654105, |
|
"eval_f1_macro": 0.4595491108495356, |
|
"eval_loss": 1.427338719367981, |
|
"eval_runtime": 72.7464, |
|
"eval_samples_per_second": 110.164, |
|
"eval_steps_per_second": 3.45, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.073817762399077, |
|
"grad_norm": 8.156586647033691, |
|
"learning_rate": 0.0004907407407407408, |
|
"loss": 1.3027, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.22722029988466, |
|
"grad_norm": 6.934875965118408, |
|
"learning_rate": 0.00048148148148148144, |
|
"loss": 1.1695, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.22722029988466, |
|
"eval_accuracy": 0.6961567257299726, |
|
"eval_f1_macro": 0.5353451961331095, |
|
"eval_loss": 1.2435524463653564, |
|
"eval_runtime": 70.2906, |
|
"eval_samples_per_second": 114.012, |
|
"eval_steps_per_second": 3.571, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.380622837370241, |
|
"grad_norm": 7.151013374328613, |
|
"learning_rate": 0.00047222222222222224, |
|
"loss": 1.0695, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.534025374855824, |
|
"grad_norm": 7.771185874938965, |
|
"learning_rate": 0.000462962962962963, |
|
"loss": 0.9787, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.534025374855824, |
|
"eval_accuracy": 0.6901672073870726, |
|
"eval_f1_macro": 0.5134333803516367, |
|
"eval_loss": 1.3313419818878174, |
|
"eval_runtime": 71.0472, |
|
"eval_samples_per_second": 112.798, |
|
"eval_steps_per_second": 3.533, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.687427912341407, |
|
"grad_norm": 6.754736423492432, |
|
"learning_rate": 0.0004537037037037037, |
|
"loss": 0.9056, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.84083044982699, |
|
"grad_norm": 6.322958946228027, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 0.836, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.84083044982699, |
|
"eval_accuracy": 0.7138757174943848, |
|
"eval_f1_macro": 0.5729793747297807, |
|
"eval_loss": 1.2415224313735962, |
|
"eval_runtime": 73.4693, |
|
"eval_samples_per_second": 109.08, |
|
"eval_steps_per_second": 3.416, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.994232987312571, |
|
"grad_norm": 7.2826619148254395, |
|
"learning_rate": 0.0004351851851851852, |
|
"loss": 0.7867, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 16.147635524798154, |
|
"grad_norm": 5.9969482421875, |
|
"learning_rate": 0.00042592592592592595, |
|
"loss": 0.7135, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.147635524798154, |
|
"eval_accuracy": 0.7389568255552783, |
|
"eval_f1_macro": 0.5793519253003285, |
|
"eval_loss": 1.1902633905410767, |
|
"eval_runtime": 73.1754, |
|
"eval_samples_per_second": 109.518, |
|
"eval_steps_per_second": 3.43, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.301038062283737, |
|
"grad_norm": 6.838934421539307, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 0.6719, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.45444059976932, |
|
"grad_norm": 7.784801006317139, |
|
"learning_rate": 0.0004074074074074074, |
|
"loss": 0.6009, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.45444059976932, |
|
"eval_accuracy": 0.7414524581981532, |
|
"eval_f1_macro": 0.6081990369390977, |
|
"eval_loss": 1.2159614562988281, |
|
"eval_runtime": 55.5881, |
|
"eval_samples_per_second": 144.168, |
|
"eval_steps_per_second": 4.515, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.607843137254903, |
|
"grad_norm": 6.171660423278809, |
|
"learning_rate": 0.0003981481481481481, |
|
"loss": 0.5756, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.761245674740483, |
|
"grad_norm": 7.959474563598633, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 0.5355, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.761245674740483, |
|
"eval_accuracy": 0.7543049663089593, |
|
"eval_f1_macro": 0.5947199785680519, |
|
"eval_loss": 1.1460059881210327, |
|
"eval_runtime": 55.0321, |
|
"eval_samples_per_second": 145.624, |
|
"eval_steps_per_second": 4.561, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.914648212226066, |
|
"grad_norm": 7.287164211273193, |
|
"learning_rate": 0.00037962962962962966, |
|
"loss": 0.5046, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 23.06805074971165, |
|
"grad_norm": 6.368403434753418, |
|
"learning_rate": 0.00037037037037037035, |
|
"loss": 0.4737, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 23.06805074971165, |
|
"eval_accuracy": 0.759920139755428, |
|
"eval_f1_macro": 0.6214937740706044, |
|
"eval_loss": 1.1644535064697266, |
|
"eval_runtime": 54.1187, |
|
"eval_samples_per_second": 148.082, |
|
"eval_steps_per_second": 4.638, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 24.22145328719723, |
|
"grad_norm": 9.294144630432129, |
|
"learning_rate": 0.0003611111111111111, |
|
"loss": 0.4349, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 25.374855824682815, |
|
"grad_norm": 7.4235310554504395, |
|
"learning_rate": 0.0003518518518518519, |
|
"loss": 0.4352, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 25.374855824682815, |
|
"eval_accuracy": 0.7545545295732469, |
|
"eval_f1_macro": 0.5917892398903293, |
|
"eval_loss": 1.213472843170166, |
|
"eval_runtime": 54.2317, |
|
"eval_samples_per_second": 147.773, |
|
"eval_steps_per_second": 4.628, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 26.528258362168398, |
|
"grad_norm": 5.937560558319092, |
|
"learning_rate": 0.00034259259259259263, |
|
"loss": 0.4017, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 27.68166089965398, |
|
"grad_norm": 6.036593914031982, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.3652, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 27.68166089965398, |
|
"eval_accuracy": 0.7732717743948091, |
|
"eval_f1_macro": 0.6375373960767734, |
|
"eval_loss": 1.1644330024719238, |
|
"eval_runtime": 75.4416, |
|
"eval_samples_per_second": 106.228, |
|
"eval_steps_per_second": 3.327, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 28.83506343713956, |
|
"grad_norm": 6.821892738342285, |
|
"learning_rate": 0.00032407407407407406, |
|
"loss": 0.3443, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 29.988465974625143, |
|
"grad_norm": 4.1507463455200195, |
|
"learning_rate": 0.0003148148148148148, |
|
"loss": 0.3246, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 29.988465974625143, |
|
"eval_accuracy": 0.7776391315198403, |
|
"eval_f1_macro": 0.6181477901694947, |
|
"eval_loss": 1.143282175064087, |
|
"eval_runtime": 75.3981, |
|
"eval_samples_per_second": 106.289, |
|
"eval_steps_per_second": 3.329, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 31.141868512110726, |
|
"grad_norm": 7.311563491821289, |
|
"learning_rate": 0.0003055555555555556, |
|
"loss": 0.3082, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 32.29527104959631, |
|
"grad_norm": 2.214399576187134, |
|
"learning_rate": 0.0002962962962962963, |
|
"loss": 0.2876, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 32.29527104959631, |
|
"eval_accuracy": 0.7700274519590716, |
|
"eval_f1_macro": 0.6278465966595438, |
|
"eval_loss": 1.2212963104248047, |
|
"eval_runtime": 73.2438, |
|
"eval_samples_per_second": 109.415, |
|
"eval_steps_per_second": 3.427, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 33.44867358708189, |
|
"grad_norm": 5.876758575439453, |
|
"learning_rate": 0.00028703703703703703, |
|
"loss": 0.2722, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 34.602076124567475, |
|
"grad_norm": 3.34192156791687, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.2539, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 34.602076124567475, |
|
"eval_accuracy": 0.7858747192413277, |
|
"eval_f1_macro": 0.6310309906248334, |
|
"eval_loss": 1.1600251197814941, |
|
"eval_runtime": 55.43, |
|
"eval_samples_per_second": 144.579, |
|
"eval_steps_per_second": 4.528, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 35.75547866205306, |
|
"grad_norm": 4.611924648284912, |
|
"learning_rate": 0.0002685185185185186, |
|
"loss": 0.2428, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 36.90888119953864, |
|
"grad_norm": 3.3283474445343018, |
|
"learning_rate": 0.00025925925925925926, |
|
"loss": 0.2322, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 36.90888119953864, |
|
"eval_accuracy": 0.7816321437484403, |
|
"eval_f1_macro": 0.6319203799590871, |
|
"eval_loss": 1.135780930519104, |
|
"eval_runtime": 63.9861, |
|
"eval_samples_per_second": 125.246, |
|
"eval_steps_per_second": 3.923, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 38.062283737024224, |
|
"grad_norm": 6.641352653503418, |
|
"learning_rate": 0.00025, |
|
"loss": 0.2146, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 39.21568627450981, |
|
"grad_norm": 6.247890949249268, |
|
"learning_rate": 0.00024074074074074072, |
|
"loss": 0.2003, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 39.21568627450981, |
|
"eval_accuracy": 0.7962315947092587, |
|
"eval_f1_macro": 0.6542244286445125, |
|
"eval_loss": 1.150564432144165, |
|
"eval_runtime": 69.2204, |
|
"eval_samples_per_second": 115.775, |
|
"eval_steps_per_second": 3.626, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 40.36908881199539, |
|
"grad_norm": 4.363713264465332, |
|
"learning_rate": 0.0002314814814814815, |
|
"loss": 0.1947, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 41.522491349480966, |
|
"grad_norm": 3.4260287284851074, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 0.1794, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 41.522491349480966, |
|
"eval_accuracy": 0.7979785375592713, |
|
"eval_f1_macro": 0.6796792585107833, |
|
"eval_loss": 1.1864490509033203, |
|
"eval_runtime": 71.1001, |
|
"eval_samples_per_second": 112.714, |
|
"eval_steps_per_second": 3.53, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 42.67589388696655, |
|
"grad_norm": 2.606008291244507, |
|
"learning_rate": 0.00021296296296296298, |
|
"loss": 0.1689, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 43.82929642445213, |
|
"grad_norm": 4.665687084197998, |
|
"learning_rate": 0.0002037037037037037, |
|
"loss": 0.1645, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 43.82929642445213, |
|
"eval_accuracy": 0.80059895183429, |
|
"eval_f1_macro": 0.6667890419585701, |
|
"eval_loss": 1.2014110088348389, |
|
"eval_runtime": 74.7112, |
|
"eval_samples_per_second": 107.266, |
|
"eval_steps_per_second": 3.36, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 44.982698961937714, |
|
"grad_norm": 3.0378100872039795, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 0.1602, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 46.1361014994233, |
|
"grad_norm": 5.274627685546875, |
|
"learning_rate": 0.00018518518518518518, |
|
"loss": 0.144, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 46.1361014994233, |
|
"eval_accuracy": 0.7989767906164212, |
|
"eval_f1_macro": 0.6582157335341974, |
|
"eval_loss": 1.1411352157592773, |
|
"eval_runtime": 73.2065, |
|
"eval_samples_per_second": 109.471, |
|
"eval_steps_per_second": 3.429, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 47.28950403690888, |
|
"grad_norm": 2.336925745010376, |
|
"learning_rate": 0.00017592592592592595, |
|
"loss": 0.1368, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 48.44290657439446, |
|
"grad_norm": 2.7309417724609375, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.1298, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 48.44290657439446, |
|
"eval_accuracy": 0.8064636885450461, |
|
"eval_f1_macro": 0.6782237618476237, |
|
"eval_loss": 1.1389836072921753, |
|
"eval_runtime": 73.3032, |
|
"eval_samples_per_second": 109.327, |
|
"eval_steps_per_second": 3.424, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 49.596309111880046, |
|
"grad_norm": 2.79067325592041, |
|
"learning_rate": 0.0001574074074074074, |
|
"loss": 0.1206, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 50.74971164936563, |
|
"grad_norm": 4.826747417449951, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.1175, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 50.74971164936563, |
|
"eval_accuracy": 0.8068380334414774, |
|
"eval_f1_macro": 0.6700916407139905, |
|
"eval_loss": 1.2090946435928345, |
|
"eval_runtime": 73.8226, |
|
"eval_samples_per_second": 108.557, |
|
"eval_steps_per_second": 3.4, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 51.90311418685121, |
|
"grad_norm": 3.403858184814453, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.1021, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 53.056516724336795, |
|
"grad_norm": 5.1802496910095215, |
|
"learning_rate": 0.00012962962962962963, |
|
"loss": 0.0977, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 53.056516724336795, |
|
"eval_accuracy": 0.8149488395308211, |
|
"eval_f1_macro": 0.682806558028361, |
|
"eval_loss": 1.1759377717971802, |
|
"eval_runtime": 73.8391, |
|
"eval_samples_per_second": 108.533, |
|
"eval_steps_per_second": 3.399, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 54.20991926182238, |
|
"grad_norm": 1.122316837310791, |
|
"learning_rate": 0.00012037037037037036, |
|
"loss": 0.0912, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 55.36332179930796, |
|
"grad_norm": 1.1100833415985107, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 0.0823, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 55.36332179930796, |
|
"eval_accuracy": 0.8166957823808335, |
|
"eval_f1_macro": 0.7045678569443168, |
|
"eval_loss": 1.2304565906524658, |
|
"eval_runtime": 74.7299, |
|
"eval_samples_per_second": 107.24, |
|
"eval_steps_per_second": 3.359, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 56.516724336793544, |
|
"grad_norm": 4.60992956161499, |
|
"learning_rate": 0.00010185185185185185, |
|
"loss": 0.0873, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 57.67012687427912, |
|
"grad_norm": 5.945472240447998, |
|
"learning_rate": 9.259259259259259e-05, |
|
"loss": 0.0767, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 57.67012687427912, |
|
"eval_accuracy": 0.8238083354130272, |
|
"eval_f1_macro": 0.6889311414034964, |
|
"eval_loss": 1.231188416481018, |
|
"eval_runtime": 72.9471, |
|
"eval_samples_per_second": 109.86, |
|
"eval_steps_per_second": 3.441, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 58.8235294117647, |
|
"grad_norm": 6.51999044418335, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.0667, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 59.976931949250286, |
|
"grad_norm": 3.6006715297698975, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.066, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 59.976931949250286, |
|
"eval_accuracy": 0.8235587721487397, |
|
"eval_f1_macro": 0.7127282615425515, |
|
"eval_loss": 1.212782621383667, |
|
"eval_runtime": 70.4845, |
|
"eval_samples_per_second": 113.699, |
|
"eval_steps_per_second": 3.561, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 61.13033448673587, |
|
"grad_norm": 1.102469563484192, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 0.0601, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 62.28373702422145, |
|
"grad_norm": 3.476552724838257, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 0.0493, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 62.28373702422145, |
|
"eval_accuracy": 0.8310456700773646, |
|
"eval_f1_macro": 0.7115209260308665, |
|
"eval_loss": 1.15741765499115, |
|
"eval_runtime": 74.8395, |
|
"eval_samples_per_second": 107.083, |
|
"eval_steps_per_second": 3.354, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 63.437139561707035, |
|
"grad_norm": 0.26378124952316284, |
|
"learning_rate": 4.6296296296296294e-05, |
|
"loss": 0.0527, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 64.59054209919262, |
|
"grad_norm": 1.6174193620681763, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0479, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 64.59054209919262, |
|
"eval_accuracy": 0.836785625155977, |
|
"eval_f1_macro": 0.7171903480510493, |
|
"eval_loss": 1.1416091918945312, |
|
"eval_runtime": 72.9274, |
|
"eval_samples_per_second": 109.89, |
|
"eval_steps_per_second": 3.442, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 65.7439446366782, |
|
"grad_norm": 1.4499250650405884, |
|
"learning_rate": 2.7777777777777776e-05, |
|
"loss": 0.0453, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 66.89734717416378, |
|
"grad_norm": 3.988093614578247, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0389, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 66.89734717416378, |
|
"eval_accuracy": 0.8370351884202646, |
|
"eval_f1_macro": 0.7212408780468642, |
|
"eval_loss": 1.1253269910812378, |
|
"eval_runtime": 53.9632, |
|
"eval_samples_per_second": 148.509, |
|
"eval_steps_per_second": 4.651, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 68.05074971164936, |
|
"grad_norm": 2.909609317779541, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0433, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 69.20415224913495, |
|
"grad_norm": 2.1668026447296143, |
|
"learning_rate": 0.0, |
|
"loss": 0.0343, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 69.20415224913495, |
|
"eval_accuracy": 0.8415273271774395, |
|
"eval_f1_macro": 0.7162084790077747, |
|
"eval_loss": 1.1328068971633911, |
|
"eval_runtime": 73.3281, |
|
"eval_samples_per_second": 109.29, |
|
"eval_steps_per_second": 3.423, |
|
"step": 30000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 70, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7427529644770302e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|