|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.99602649006623, |
|
"global_step": 9400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 15.287, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 7.6558, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 4.9409, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 4.2283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 3.6871, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 3.5374293327331543, |
|
"eval_runtime": 140.96, |
|
"eval_samples_per_second": 19.467, |
|
"eval_steps_per_second": 2.433, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 3.4073, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.2613, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.2504, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 3.1732, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 3.1501, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 3.127795934677124, |
|
"eval_runtime": 140.7753, |
|
"eval_samples_per_second": 19.492, |
|
"eval_steps_per_second": 2.437, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 3.0666, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 2.7047, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 2.0895, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 5.2462499999999994e-05, |
|
"loss": 1.7459, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 5.62125e-05, |
|
"loss": 1.5843, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_loss": 0.6358404755592346, |
|
"eval_runtime": 138.8212, |
|
"eval_samples_per_second": 19.766, |
|
"eval_steps_per_second": 2.471, |
|
"eval_wer": 0.6914246512691516, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 5.9962499999999994e-05, |
|
"loss": 1.5166, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 6.37125e-05, |
|
"loss": 1.4555, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 6.746249999999999e-05, |
|
"loss": 1.4013, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 7.121249999999999e-05, |
|
"loss": 1.3841, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 7.49625e-05, |
|
"loss": 1.3378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"eval_loss": 0.442169189453125, |
|
"eval_runtime": 140.0078, |
|
"eval_samples_per_second": 19.599, |
|
"eval_steps_per_second": 2.45, |
|
"eval_wer": 0.5924536931168534, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 7.400675675675676e-05, |
|
"loss": 1.3249, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 7.299324324324324e-05, |
|
"loss": 1.2996, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 7.197972972972972e-05, |
|
"loss": 1.2952, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 7.096621621621621e-05, |
|
"loss": 1.2626, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 6.99527027027027e-05, |
|
"loss": 1.2595, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"eval_loss": 0.3921487033367157, |
|
"eval_runtime": 140.1536, |
|
"eval_samples_per_second": 19.579, |
|
"eval_steps_per_second": 2.447, |
|
"eval_wer": 0.5511548136290876, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 6.893918918918919e-05, |
|
"loss": 1.2282, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 6.792567567567567e-05, |
|
"loss": 1.2352, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 6.691216216216216e-05, |
|
"loss": 1.2129, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 6.589864864864864e-05, |
|
"loss": 1.2184, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 6.488513513513514e-05, |
|
"loss": 1.1643, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_loss": 0.35074228048324585, |
|
"eval_runtime": 142.5452, |
|
"eval_samples_per_second": 19.25, |
|
"eval_steps_per_second": 2.406, |
|
"eval_wer": 0.514932540589984, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 6.388175675675675e-05, |
|
"loss": 1.1704, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 6.287837837837837e-05, |
|
"loss": 1.1774, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 6.186486486486485e-05, |
|
"loss": 1.157, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 6.085135135135135e-05, |
|
"loss": 1.1605, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 5.983783783783783e-05, |
|
"loss": 1.1352, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"eval_loss": 0.3350585401058197, |
|
"eval_runtime": 140.4732, |
|
"eval_samples_per_second": 19.534, |
|
"eval_steps_per_second": 2.442, |
|
"eval_wer": 0.5018980105190944, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 5.882432432432432e-05, |
|
"loss": 1.1474, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 5.781081081081081e-05, |
|
"loss": 1.1273, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 5.679729729729729e-05, |
|
"loss": 1.133, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 5.578378378378378e-05, |
|
"loss": 1.1112, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 5.477027027027026e-05, |
|
"loss": 1.1113, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"eval_loss": 0.3152759373188019, |
|
"eval_runtime": 139.3947, |
|
"eval_samples_per_second": 19.685, |
|
"eval_steps_per_second": 2.461, |
|
"eval_wer": 0.4845186370912417, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 5.375675675675675e-05, |
|
"loss": 1.1029, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 5.274324324324324e-05, |
|
"loss": 1.1124, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.87, |
|
"learning_rate": 5.172972972972972e-05, |
|
"loss": 1.091, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 5.071621621621621e-05, |
|
"loss": 1.097, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 4.97027027027027e-05, |
|
"loss": 1.0914, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"eval_loss": 0.3050296902656555, |
|
"eval_runtime": 137.2603, |
|
"eval_samples_per_second": 19.991, |
|
"eval_steps_per_second": 2.499, |
|
"eval_wer": 0.4594100160073176, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.47, |
|
"learning_rate": 4.8689189189189184e-05, |
|
"loss": 1.0749, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.767567567567567e-05, |
|
"loss": 1.0807, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 4.666216216216216e-05, |
|
"loss": 1.0647, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 4.564864864864864e-05, |
|
"loss": 1.068, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 4.463513513513513e-05, |
|
"loss": 1.0468, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"eval_loss": 0.2889558672904968, |
|
"eval_runtime": 137.6066, |
|
"eval_samples_per_second": 19.941, |
|
"eval_steps_per_second": 2.493, |
|
"eval_wer": 0.44701577864166475, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 4.3621621621621624e-05, |
|
"loss": 1.0548, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 27.66, |
|
"learning_rate": 4.26081081081081e-05, |
|
"loss": 1.0423, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 4.160472972972973e-05, |
|
"loss": 1.0436, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 4.059121621621621e-05, |
|
"loss": 1.0338, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 3.95777027027027e-05, |
|
"loss": 1.0473, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_loss": 0.27552109956741333, |
|
"eval_runtime": 139.7371, |
|
"eval_samples_per_second": 19.637, |
|
"eval_steps_per_second": 2.455, |
|
"eval_wer": 0.43306654470615136, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 3.856418918918919e-05, |
|
"loss": 1.027, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 3.755067567567568e-05, |
|
"loss": 1.0418, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 30.85, |
|
"learning_rate": 3.653716216216216e-05, |
|
"loss": 1.0147, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 31.38, |
|
"learning_rate": 3.552364864864865e-05, |
|
"loss": 1.0197, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"learning_rate": 3.451013513513513e-05, |
|
"loss": 1.0065, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"eval_loss": 0.27181389927864075, |
|
"eval_runtime": 138.4335, |
|
"eval_samples_per_second": 19.822, |
|
"eval_steps_per_second": 2.478, |
|
"eval_wer": 0.42638920649439743, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 3.3496621621621615e-05, |
|
"loss": 1.0138, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 32.97, |
|
"learning_rate": 3.2483108108108105e-05, |
|
"loss": 1.0027, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 33.51, |
|
"learning_rate": 3.146959459459459e-05, |
|
"loss": 1.0157, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 34.04, |
|
"learning_rate": 3.045608108108108e-05, |
|
"loss": 1.0034, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 34.57, |
|
"learning_rate": 2.9442567567567563e-05, |
|
"loss": 0.9794, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 34.57, |
|
"eval_loss": 0.2646064758300781, |
|
"eval_runtime": 138.3623, |
|
"eval_samples_per_second": 19.832, |
|
"eval_steps_per_second": 2.479, |
|
"eval_wer": 0.41934598673679396, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 2.8429054054054054e-05, |
|
"loss": 0.9911, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 35.64, |
|
"learning_rate": 2.7415540540540538e-05, |
|
"loss": 0.9807, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 36.17, |
|
"learning_rate": 2.6402027027027025e-05, |
|
"loss": 0.9843, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 36.7, |
|
"learning_rate": 2.538851351351351e-05, |
|
"loss": 0.9735, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 0.9849, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"eval_loss": 0.2609545886516571, |
|
"eval_runtime": 144.7382, |
|
"eval_samples_per_second": 18.958, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.4058083695403613, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 37.76, |
|
"learning_rate": 2.3361486486486483e-05, |
|
"loss": 0.983, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 2.234797297297297e-05, |
|
"loss": 0.9775, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 38.83, |
|
"learning_rate": 2.1334459459459458e-05, |
|
"loss": 0.9597, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 2.0320945945945945e-05, |
|
"loss": 0.9606, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 39.89, |
|
"learning_rate": 1.930743243243243e-05, |
|
"loss": 0.9496, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 39.89, |
|
"eval_loss": 0.2522386610507965, |
|
"eval_runtime": 139.7036, |
|
"eval_samples_per_second": 19.642, |
|
"eval_steps_per_second": 2.455, |
|
"eval_wer": 0.3984907386233707, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"learning_rate": 1.829391891891892e-05, |
|
"loss": 0.9581, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 40.95, |
|
"learning_rate": 1.7280405405405403e-05, |
|
"loss": 0.9477, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 41.49, |
|
"learning_rate": 1.626689189189189e-05, |
|
"loss": 0.9538, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"learning_rate": 1.5253378378378378e-05, |
|
"loss": 0.9631, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"learning_rate": 1.4239864864864863e-05, |
|
"loss": 0.9367, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"eval_loss": 0.25142449140548706, |
|
"eval_runtime": 138.6762, |
|
"eval_samples_per_second": 19.787, |
|
"eval_steps_per_second": 2.473, |
|
"eval_wer": 0.3946947175851818, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 43.08, |
|
"learning_rate": 1.322635135135135e-05, |
|
"loss": 0.9498, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 43.61, |
|
"learning_rate": 1.2212837837837838e-05, |
|
"loss": 0.9389, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 44.15, |
|
"learning_rate": 1.1199324324324323e-05, |
|
"loss": 0.937, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 44.68, |
|
"learning_rate": 1.0195945945945945e-05, |
|
"loss": 0.9394, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 45.21, |
|
"learning_rate": 9.182432432432432e-06, |
|
"loss": 0.9295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 45.21, |
|
"eval_loss": 0.24582630395889282, |
|
"eval_runtime": 140.7119, |
|
"eval_samples_per_second": 19.501, |
|
"eval_steps_per_second": 2.438, |
|
"eval_wer": 0.3883375257260462, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 45.74, |
|
"learning_rate": 8.168918918918917e-06, |
|
"loss": 0.9244, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 46.28, |
|
"learning_rate": 7.1554054054054045e-06, |
|
"loss": 0.9273, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 46.81, |
|
"learning_rate": 6.141891891891891e-06, |
|
"loss": 0.9229, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 47.34, |
|
"learning_rate": 5.128378378378377e-06, |
|
"loss": 0.9392, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"learning_rate": 4.1148648648648645e-06, |
|
"loss": 0.9187, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"eval_loss": 0.24390903115272522, |
|
"eval_runtime": 139.6323, |
|
"eval_samples_per_second": 19.652, |
|
"eval_steps_per_second": 2.456, |
|
"eval_wer": 0.3832609192773839, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"learning_rate": 3.1013513513513513e-06, |
|
"loss": 0.9194, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 48.93, |
|
"learning_rate": 2.0878378378378376e-06, |
|
"loss": 0.9234, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 49.47, |
|
"learning_rate": 1.0743243243243242e-06, |
|
"loss": 0.9257, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 6.08108108108108e-08, |
|
"loss": 0.9097, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 9400, |
|
"total_flos": 5.3948756860365595e+19, |
|
"train_loss": 1.5738848333155855, |
|
"train_runtime": 24806.553, |
|
"train_samples_per_second": 12.162, |
|
"train_steps_per_second": 0.379 |
|
} |
|
], |
|
"max_steps": 9400, |
|
"num_train_epochs": 50, |
|
"total_flos": 5.3948756860365595e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|