|
{ |
|
"best_metric": 0.691096901131333, |
|
"best_model_checkpoint": "de-DE/wav2vec2-xls-r-300m/checkpoint-5040", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 5400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 1.4890042543411255, |
|
"learning_rate": 9.074074074074075e-06, |
|
"loss": 4.091, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 1.1139342784881592, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 4.0628, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.7727408409118652, |
|
"learning_rate": 2.7592592592592594e-05, |
|
"loss": 3.9799, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.06443679291687161, |
|
"eval_f1": 0.00205206930041668, |
|
"eval_loss": 3.8087375164031982, |
|
"eval_runtime": 21.0392, |
|
"eval_samples_per_second": 96.629, |
|
"eval_steps_per_second": 1.521, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 1.1827894449234009, |
|
"learning_rate": 3.685185185185185e-05, |
|
"loss": 3.8573, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 1.3509910106658936, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 3.7732, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 1.5033173561096191, |
|
"learning_rate": 5.537037037037037e-05, |
|
"loss": 3.7526, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 1.2770198583602905, |
|
"learning_rate": 6.462962962962962e-05, |
|
"loss": 3.7773, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.06443679291687161, |
|
"eval_f1": 0.00205206930041668, |
|
"eval_loss": 3.736783504486084, |
|
"eval_runtime": 21.1812, |
|
"eval_samples_per_second": 95.982, |
|
"eval_steps_per_second": 1.511, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 1.2693891525268555, |
|
"learning_rate": 7.38888888888889e-05, |
|
"loss": 3.7473, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.4083213806152344, |
|
"learning_rate": 8.314814814814815e-05, |
|
"loss": 3.7626, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 1.625786542892456, |
|
"learning_rate": 9.240740740740741e-05, |
|
"loss": 3.7632, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.06443679291687161, |
|
"eval_f1": 0.00205206930041668, |
|
"eval_loss": 3.734524965286255, |
|
"eval_runtime": 21.1901, |
|
"eval_samples_per_second": 95.941, |
|
"eval_steps_per_second": 1.51, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 1.2629117965698242, |
|
"learning_rate": 9.981481481481481e-05, |
|
"loss": 3.7604, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 1.164554476737976, |
|
"learning_rate": 9.878600823045268e-05, |
|
"loss": 3.7564, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 0.9783994555473328, |
|
"learning_rate": 9.775720164609054e-05, |
|
"loss": 3.7596, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 1.4372780323028564, |
|
"learning_rate": 9.672839506172841e-05, |
|
"loss": 3.7706, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.06443679291687161, |
|
"eval_f1": 0.00205206930041668, |
|
"eval_loss": 3.737985372543335, |
|
"eval_runtime": 20.8407, |
|
"eval_samples_per_second": 97.55, |
|
"eval_steps_per_second": 1.535, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 1.2122852802276611, |
|
"learning_rate": 9.569958847736626e-05, |
|
"loss": 3.7516, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 1.3332332372665405, |
|
"learning_rate": 9.467078189300412e-05, |
|
"loss": 3.735, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"grad_norm": 0.9196431040763855, |
|
"learning_rate": 9.364197530864199e-05, |
|
"loss": 3.7224, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3251221179962158, |
|
"learning_rate": 9.261316872427984e-05, |
|
"loss": 3.7518, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.09296606000983768, |
|
"eval_f1": 0.0059176939246660745, |
|
"eval_loss": 3.6887905597686768, |
|
"eval_runtime": 21.2519, |
|
"eval_samples_per_second": 95.662, |
|
"eval_steps_per_second": 1.506, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"grad_norm": 1.470018982887268, |
|
"learning_rate": 9.15843621399177e-05, |
|
"loss": 3.7136, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 1.4413223266601562, |
|
"learning_rate": 9.055555555555556e-05, |
|
"loss": 3.6818, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"grad_norm": 1.2150436639785767, |
|
"learning_rate": 8.952674897119342e-05, |
|
"loss": 3.699, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.09936055090998525, |
|
"eval_f1": 0.0073766292774243535, |
|
"eval_loss": 3.6655001640319824, |
|
"eval_runtime": 20.9819, |
|
"eval_samples_per_second": 96.893, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"grad_norm": 1.4290884733200073, |
|
"learning_rate": 8.849794238683128e-05, |
|
"loss": 3.6581, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"grad_norm": 1.6972917318344116, |
|
"learning_rate": 8.746913580246914e-05, |
|
"loss": 3.6401, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 1.3264811038970947, |
|
"learning_rate": 8.644032921810699e-05, |
|
"loss": 3.6843, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 1.6939361095428467, |
|
"learning_rate": 8.541152263374486e-05, |
|
"loss": 3.6017, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.11116576487948844, |
|
"eval_f1": 0.007963237845435386, |
|
"eval_loss": 3.613088846206665, |
|
"eval_runtime": 21.1422, |
|
"eval_samples_per_second": 96.158, |
|
"eval_steps_per_second": 1.514, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"grad_norm": 1.3342170715332031, |
|
"learning_rate": 8.438271604938272e-05, |
|
"loss": 3.5671, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 2.0039055347442627, |
|
"learning_rate": 8.335390946502059e-05, |
|
"loss": 3.563, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 1.9838131666183472, |
|
"learning_rate": 8.232510288065844e-05, |
|
"loss": 3.4976, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.12297097884899164, |
|
"eval_f1": 0.0207489285319609, |
|
"eval_loss": 3.5292012691497803, |
|
"eval_runtime": 21.1877, |
|
"eval_samples_per_second": 95.952, |
|
"eval_steps_per_second": 1.51, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"grad_norm": 1.7923851013183594, |
|
"learning_rate": 8.12962962962963e-05, |
|
"loss": 3.5203, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 2.5529909133911133, |
|
"learning_rate": 8.026748971193417e-05, |
|
"loss": 3.4522, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.61111111111111, |
|
"grad_norm": 1.8508727550506592, |
|
"learning_rate": 7.925925925925926e-05, |
|
"loss": 3.418, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 2.031550407409668, |
|
"learning_rate": 7.823045267489712e-05, |
|
"loss": 3.379, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.14805705853418594, |
|
"eval_f1": 0.030926614730120143, |
|
"eval_loss": 3.388072967529297, |
|
"eval_runtime": 21.1544, |
|
"eval_samples_per_second": 96.103, |
|
"eval_steps_per_second": 1.513, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.166666666666666, |
|
"grad_norm": 2.7599411010742188, |
|
"learning_rate": 7.720164609053499e-05, |
|
"loss": 3.3362, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.444444444444445, |
|
"grad_norm": 3.2844510078430176, |
|
"learning_rate": 7.617283950617285e-05, |
|
"loss": 3.3089, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.722222222222221, |
|
"grad_norm": 3.1966094970703125, |
|
"learning_rate": 7.516460905349794e-05, |
|
"loss": 3.2753, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.316312074661255, |
|
"learning_rate": 7.413580246913581e-05, |
|
"loss": 3.2182, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.19527791441219872, |
|
"eval_f1": 0.04058892684496088, |
|
"eval_loss": 3.1913669109344482, |
|
"eval_runtime": 20.9617, |
|
"eval_samples_per_second": 96.986, |
|
"eval_steps_per_second": 1.527, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.277777777777779, |
|
"grad_norm": 3.078585624694824, |
|
"learning_rate": 7.310699588477367e-05, |
|
"loss": 3.0993, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 10.555555555555555, |
|
"grad_norm": 6.923484802246094, |
|
"learning_rate": 7.207818930041152e-05, |
|
"loss": 3.0807, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.833333333333334, |
|
"grad_norm": 5.272449970245361, |
|
"learning_rate": 7.104938271604938e-05, |
|
"loss": 2.9804, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.2267584849975406, |
|
"eval_f1": 0.05879115082822113, |
|
"eval_loss": 2.93943452835083, |
|
"eval_runtime": 21.2521, |
|
"eval_samples_per_second": 95.661, |
|
"eval_steps_per_second": 1.506, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 7.379478931427002, |
|
"learning_rate": 7.002057613168725e-05, |
|
"loss": 2.9518, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.38888888888889, |
|
"grad_norm": 4.347708225250244, |
|
"learning_rate": 6.89917695473251e-05, |
|
"loss": 2.8607, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 11.666666666666666, |
|
"grad_norm": 3.146164894104004, |
|
"learning_rate": 6.796296296296296e-05, |
|
"loss": 2.805, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.944444444444445, |
|
"grad_norm": 3.2119126319885254, |
|
"learning_rate": 6.693415637860082e-05, |
|
"loss": 2.7577, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.2838170191834727, |
|
"eval_f1": 0.09477911894276655, |
|
"eval_loss": 2.6934072971343994, |
|
"eval_runtime": 21.1897, |
|
"eval_samples_per_second": 95.943, |
|
"eval_steps_per_second": 1.51, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 12.222222222222221, |
|
"grad_norm": 5.8592329025268555, |
|
"learning_rate": 6.590534979423869e-05, |
|
"loss": 2.6594, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 3.185109853744507, |
|
"learning_rate": 6.487654320987656e-05, |
|
"loss": 2.5666, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.777777777777779, |
|
"grad_norm": 6.137824535369873, |
|
"learning_rate": 6.384773662551441e-05, |
|
"loss": 2.5161, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.3320216428922774, |
|
"eval_f1": 0.13001894064725447, |
|
"eval_loss": 2.445955753326416, |
|
"eval_runtime": 21.1936, |
|
"eval_samples_per_second": 95.925, |
|
"eval_steps_per_second": 1.51, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 13.055555555555555, |
|
"grad_norm": 3.701847791671753, |
|
"learning_rate": 6.281893004115227e-05, |
|
"loss": 2.408, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 3.357701539993286, |
|
"learning_rate": 6.179012345679012e-05, |
|
"loss": 2.2915, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.61111111111111, |
|
"grad_norm": 4.91466760635376, |
|
"learning_rate": 6.0761316872427985e-05, |
|
"loss": 2.2576, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"grad_norm": 4.737729072570801, |
|
"learning_rate": 5.973251028806585e-05, |
|
"loss": 2.2414, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.3890801770782095, |
|
"eval_f1": 0.1854455337191276, |
|
"eval_loss": 2.230682373046875, |
|
"eval_runtime": 21.2287, |
|
"eval_samples_per_second": 95.767, |
|
"eval_steps_per_second": 1.507, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 14.166666666666666, |
|
"grad_norm": 12.414363861083984, |
|
"learning_rate": 5.8703703703703704e-05, |
|
"loss": 2.125, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 14.444444444444445, |
|
"grad_norm": 4.032175540924072, |
|
"learning_rate": 5.7674897119341566e-05, |
|
"loss": 2.0399, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.722222222222221, |
|
"grad_norm": 5.251100540161133, |
|
"learning_rate": 5.664609053497942e-05, |
|
"loss": 2.023, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.966403484344482, |
|
"learning_rate": 5.5617283950617284e-05, |
|
"loss": 1.944, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.45204131824889326, |
|
"eval_f1": 0.2558497963614674, |
|
"eval_loss": 2.048044204711914, |
|
"eval_runtime": 21.1364, |
|
"eval_samples_per_second": 96.185, |
|
"eval_steps_per_second": 1.514, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.277777777777779, |
|
"grad_norm": 4.946567535400391, |
|
"learning_rate": 5.458847736625514e-05, |
|
"loss": 1.7941, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 15.555555555555555, |
|
"grad_norm": 6.810694217681885, |
|
"learning_rate": 5.355967078189301e-05, |
|
"loss": 1.7366, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.833333333333334, |
|
"grad_norm": 7.022438049316406, |
|
"learning_rate": 5.253086419753087e-05, |
|
"loss": 1.7764, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5081160846040335, |
|
"eval_f1": 0.32205117403665584, |
|
"eval_loss": 1.9176976680755615, |
|
"eval_runtime": 21.2753, |
|
"eval_samples_per_second": 95.557, |
|
"eval_steps_per_second": 1.504, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 16.11111111111111, |
|
"grad_norm": 4.259357929229736, |
|
"learning_rate": 5.150205761316873e-05, |
|
"loss": 1.6464, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.38888888888889, |
|
"grad_norm": 3.2829339504241943, |
|
"learning_rate": 5.047325102880659e-05, |
|
"loss": 1.5819, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 5.9115986824035645, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 1.5292, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.944444444444443, |
|
"grad_norm": 5.184513092041016, |
|
"learning_rate": 4.841563786008231e-05, |
|
"loss": 1.5396, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5612395474667978, |
|
"eval_f1": 0.38352153766273633, |
|
"eval_loss": 1.7599705457687378, |
|
"eval_runtime": 21.2693, |
|
"eval_samples_per_second": 95.584, |
|
"eval_steps_per_second": 1.505, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 17.22222222222222, |
|
"grad_norm": 7.147132396697998, |
|
"learning_rate": 4.7386831275720164e-05, |
|
"loss": 1.3781, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 4.889599800109863, |
|
"learning_rate": 4.635802469135803e-05, |
|
"loss": 1.3611, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 4.039378643035889, |
|
"learning_rate": 4.532921810699588e-05, |
|
"loss": 1.3245, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6040334481062469, |
|
"eval_f1": 0.44146839497484525, |
|
"eval_loss": 1.6344797611236572, |
|
"eval_runtime": 21.2985, |
|
"eval_samples_per_second": 95.453, |
|
"eval_steps_per_second": 1.502, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 18.055555555555557, |
|
"grad_norm": 7.149341583251953, |
|
"learning_rate": 4.4300411522633745e-05, |
|
"loss": 1.3637, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 18.333333333333332, |
|
"grad_norm": 8.064897537231445, |
|
"learning_rate": 4.327160493827161e-05, |
|
"loss": 1.2293, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.61111111111111, |
|
"grad_norm": 7.903775691986084, |
|
"learning_rate": 4.224279835390947e-05, |
|
"loss": 1.2622, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 18.88888888888889, |
|
"grad_norm": 4.111396312713623, |
|
"learning_rate": 4.1213991769547326e-05, |
|
"loss": 1.1825, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6246925725528775, |
|
"eval_f1": 0.4763520182391385, |
|
"eval_loss": 1.5767754316329956, |
|
"eval_runtime": 21.0448, |
|
"eval_samples_per_second": 96.603, |
|
"eval_steps_per_second": 1.521, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 19.166666666666668, |
|
"grad_norm": 6.990283966064453, |
|
"learning_rate": 4.018518518518519e-05, |
|
"loss": 1.1418, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 19.444444444444443, |
|
"grad_norm": 5.295820236206055, |
|
"learning_rate": 3.9156378600823044e-05, |
|
"loss": 1.1469, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.72222222222222, |
|
"grad_norm": 4.929720401763916, |
|
"learning_rate": 3.812757201646091e-05, |
|
"loss": 1.0467, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 9.373787879943848, |
|
"learning_rate": 3.709876543209877e-05, |
|
"loss": 1.0525, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6374815543531727, |
|
"eval_f1": 0.49386444314380973, |
|
"eval_loss": 1.5208549499511719, |
|
"eval_runtime": 21.2234, |
|
"eval_samples_per_second": 95.791, |
|
"eval_steps_per_second": 1.508, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 20.27777777777778, |
|
"grad_norm": 7.043643474578857, |
|
"learning_rate": 3.606995884773663e-05, |
|
"loss": 0.9412, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 20.555555555555557, |
|
"grad_norm": 4.172306537628174, |
|
"learning_rate": 3.504115226337449e-05, |
|
"loss": 0.9267, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.833333333333332, |
|
"grad_norm": 7.322742462158203, |
|
"learning_rate": 3.401234567901235e-05, |
|
"loss": 0.9496, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6586325627151992, |
|
"eval_f1": 0.526846190918778, |
|
"eval_loss": 1.4922317266464233, |
|
"eval_runtime": 21.2176, |
|
"eval_samples_per_second": 95.816, |
|
"eval_steps_per_second": 1.508, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 21.11111111111111, |
|
"grad_norm": 3.450895071029663, |
|
"learning_rate": 3.2983539094650206e-05, |
|
"loss": 0.8906, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 21.38888888888889, |
|
"grad_norm": 4.777499675750732, |
|
"learning_rate": 3.195473251028807e-05, |
|
"loss": 0.895, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 21.666666666666668, |
|
"grad_norm": 6.931682109832764, |
|
"learning_rate": 3.0925925925925924e-05, |
|
"loss": 0.8423, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.944444444444443, |
|
"grad_norm": 4.988988876342773, |
|
"learning_rate": 2.989711934156379e-05, |
|
"loss": 0.8512, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6714215445154943, |
|
"eval_f1": 0.5555183831392889, |
|
"eval_loss": 1.4699825048446655, |
|
"eval_runtime": 20.1301, |
|
"eval_samples_per_second": 100.993, |
|
"eval_steps_per_second": 1.59, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 5.021641731262207, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.8724, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"grad_norm": 12.957953453063965, |
|
"learning_rate": 2.786008230452675e-05, |
|
"loss": 0.7648, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 22.77777777777778, |
|
"grad_norm": 11.249091148376465, |
|
"learning_rate": 2.683127572016461e-05, |
|
"loss": 0.7718, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6709296606000984, |
|
"eval_f1": 0.5596452801153066, |
|
"eval_loss": 1.5005793571472168, |
|
"eval_runtime": 20.1506, |
|
"eval_samples_per_second": 100.89, |
|
"eval_steps_per_second": 1.588, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 23.055555555555557, |
|
"grad_norm": 6.9278788566589355, |
|
"learning_rate": 2.580246913580247e-05, |
|
"loss": 0.7561, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 23.333333333333332, |
|
"grad_norm": 6.076067924499512, |
|
"learning_rate": 2.4773662551440328e-05, |
|
"loss": 0.6688, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 23.61111111111111, |
|
"grad_norm": 9.00234317779541, |
|
"learning_rate": 2.374485596707819e-05, |
|
"loss": 0.6977, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 23.88888888888889, |
|
"grad_norm": 6.011639595031738, |
|
"learning_rate": 2.271604938271605e-05, |
|
"loss": 0.7335, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6783079193310378, |
|
"eval_f1": 0.5742235106898269, |
|
"eval_loss": 1.4700865745544434, |
|
"eval_runtime": 20.1667, |
|
"eval_samples_per_second": 100.81, |
|
"eval_steps_per_second": 1.587, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 24.166666666666668, |
|
"grad_norm": 6.378417491912842, |
|
"learning_rate": 2.168724279835391e-05, |
|
"loss": 0.7581, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 24.444444444444443, |
|
"grad_norm": 5.407553672790527, |
|
"learning_rate": 2.0658436213991768e-05, |
|
"loss": 0.645, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.72222222222222, |
|
"grad_norm": 4.054404258728027, |
|
"learning_rate": 1.962962962962963e-05, |
|
"loss": 0.644, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 6.489627838134766, |
|
"learning_rate": 1.860082304526749e-05, |
|
"loss": 0.6514, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6787998032464339, |
|
"eval_f1": 0.5776489069621243, |
|
"eval_loss": 1.5083099603652954, |
|
"eval_runtime": 20.1289, |
|
"eval_samples_per_second": 100.999, |
|
"eval_steps_per_second": 1.59, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.27777777777778, |
|
"grad_norm": 4.55994176864624, |
|
"learning_rate": 1.757201646090535e-05, |
|
"loss": 0.6414, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 25.555555555555557, |
|
"grad_norm": 3.7992448806762695, |
|
"learning_rate": 1.654320987654321e-05, |
|
"loss": 0.572, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 25.833333333333332, |
|
"grad_norm": 28.1027774810791, |
|
"learning_rate": 1.551440329218107e-05, |
|
"loss": 0.6161, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6802754549926218, |
|
"eval_f1": 0.5875122879521407, |
|
"eval_loss": 1.532254695892334, |
|
"eval_runtime": 20.1215, |
|
"eval_samples_per_second": 101.036, |
|
"eval_steps_per_second": 1.59, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 26.11111111111111, |
|
"grad_norm": 6.135079383850098, |
|
"learning_rate": 1.448559670781893e-05, |
|
"loss": 0.6043, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 26.38888888888889, |
|
"grad_norm": 4.171236515045166, |
|
"learning_rate": 1.3477366255144033e-05, |
|
"loss": 0.6035, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 5.243923664093018, |
|
"learning_rate": 1.2448559670781894e-05, |
|
"loss": 0.5319, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.944444444444443, |
|
"grad_norm": 7.352174282073975, |
|
"learning_rate": 1.1419753086419753e-05, |
|
"loss": 0.5576, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6822429906542056, |
|
"eval_f1": 0.5893952239065103, |
|
"eval_loss": 1.5246381759643555, |
|
"eval_runtime": 20.121, |
|
"eval_samples_per_second": 101.039, |
|
"eval_steps_per_second": 1.59, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 27.22222222222222, |
|
"grad_norm": 6.190335750579834, |
|
"learning_rate": 1.0390946502057614e-05, |
|
"loss": 0.5641, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"grad_norm": 4.43634557723999, |
|
"learning_rate": 9.362139917695475e-06, |
|
"loss": 0.5445, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 27.77777777777778, |
|
"grad_norm": 3.3575663566589355, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.5092, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.691096901131333, |
|
"eval_f1": 0.6042086142382966, |
|
"eval_loss": 1.5102460384368896, |
|
"eval_runtime": 20.1291, |
|
"eval_samples_per_second": 100.998, |
|
"eval_steps_per_second": 1.59, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 28.055555555555557, |
|
"grad_norm": 11.934253692626953, |
|
"learning_rate": 7.304526748971194e-06, |
|
"loss": 0.5371, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 28.333333333333332, |
|
"grad_norm": 5.053288459777832, |
|
"learning_rate": 6.275720164609055e-06, |
|
"loss": 0.498, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 28.61111111111111, |
|
"grad_norm": 4.996842861175537, |
|
"learning_rate": 5.246913580246914e-06, |
|
"loss": 0.5505, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 28.88888888888889, |
|
"grad_norm": 13.71835994720459, |
|
"learning_rate": 4.218106995884774e-06, |
|
"loss": 0.5109, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6886374815543532, |
|
"eval_f1": 0.600059116410086, |
|
"eval_loss": 1.5202569961547852, |
|
"eval_runtime": 20.2693, |
|
"eval_samples_per_second": 100.299, |
|
"eval_steps_per_second": 1.579, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 29.166666666666668, |
|
"grad_norm": 3.211416721343994, |
|
"learning_rate": 3.1893004115226337e-06, |
|
"loss": 0.5093, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 29.444444444444443, |
|
"grad_norm": 4.839755058288574, |
|
"learning_rate": 2.1604938271604937e-06, |
|
"loss": 0.5158, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 29.72222222222222, |
|
"grad_norm": 5.453362464904785, |
|
"learning_rate": 1.131687242798354e-06, |
|
"loss": 0.522, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.713297128677368, |
|
"learning_rate": 1.0288065843621401e-07, |
|
"loss": 0.5301, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.690605017215937, |
|
"eval_f1": 0.603264981616179, |
|
"eval_loss": 1.5189286470413208, |
|
"eval_runtime": 20.2156, |
|
"eval_samples_per_second": 100.566, |
|
"eval_steps_per_second": 1.583, |
|
"step": 5400 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0469261495692793e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|