|
{ |
|
"best_global_step": 41000, |
|
"best_metric": 0.359779867002981, |
|
"best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v6/checkpoint-41000", |
|
"epoch": 159.7456, |
|
"eval_steps": 1000, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 25.32911491394043, |
|
"learning_rate": 7.88e-07, |
|
"loss": 3.6275, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2784, |
|
"grad_norm": 20.188899993896484, |
|
"learning_rate": 1.588e-06, |
|
"loss": 2.0941, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9184, |
|
"grad_norm": 20.781444549560547, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 1.6316, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.5568, |
|
"grad_norm": 18.28050994873047, |
|
"learning_rate": 3.188e-06, |
|
"loss": 1.3303, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.1952, |
|
"grad_norm": 19.185575485229492, |
|
"learning_rate": 3.988000000000001e-06, |
|
"loss": 1.1166, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.1952, |
|
"eval_loss": 1.0200245380401611, |
|
"eval_runtime": 143.6337, |
|
"eval_samples_per_second": 4.351, |
|
"eval_steps_per_second": 1.093, |
|
"eval_wer": 0.6975464343040587, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.8352, |
|
"grad_norm": 16.41274070739746, |
|
"learning_rate": 4.7880000000000006e-06, |
|
"loss": 0.9805, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.4736, |
|
"grad_norm": 14.031892776489258, |
|
"learning_rate": 5.588e-06, |
|
"loss": 0.8521, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.112, |
|
"grad_norm": 14.105754852294922, |
|
"learning_rate": 6.3880000000000005e-06, |
|
"loss": 0.7633, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.752, |
|
"grad_norm": 13.716428756713867, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 0.6676, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.3904, |
|
"grad_norm": 12.681669235229492, |
|
"learning_rate": 7.988e-06, |
|
"loss": 0.5644, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.3904, |
|
"eval_loss": 0.7341693043708801, |
|
"eval_runtime": 113.3954, |
|
"eval_samples_per_second": 5.512, |
|
"eval_steps_per_second": 1.385, |
|
"eval_wer": 0.5897729878468241, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0288, |
|
"grad_norm": 12.7446928024292, |
|
"learning_rate": 8.788000000000001e-06, |
|
"loss": 0.5326, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.6688, |
|
"grad_norm": 12.206501007080078, |
|
"learning_rate": 9.588e-06, |
|
"loss": 0.4346, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.3072, |
|
"grad_norm": 9.13988208770752, |
|
"learning_rate": 1.0388e-05, |
|
"loss": 0.3967, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.9472, |
|
"grad_norm": 12.071162223815918, |
|
"learning_rate": 1.1188e-05, |
|
"loss": 0.3532, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.5856, |
|
"grad_norm": 8.742654800415039, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 0.2837, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.5856, |
|
"eval_loss": 0.666342556476593, |
|
"eval_runtime": 133.4076, |
|
"eval_samples_per_second": 4.685, |
|
"eval_steps_per_second": 1.177, |
|
"eval_wer": 0.5700527401972024, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.224, |
|
"grad_norm": 10.311443328857422, |
|
"learning_rate": 1.2788e-05, |
|
"loss": 0.2601, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.864, |
|
"grad_norm": 10.367297172546387, |
|
"learning_rate": 1.3588000000000001e-05, |
|
"loss": 0.227, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.5024, |
|
"grad_norm": 10.210521697998047, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 0.1883, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.1408, |
|
"grad_norm": 9.120365142822266, |
|
"learning_rate": 1.5188000000000001e-05, |
|
"loss": 0.1716, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 12.7808, |
|
"grad_norm": 8.728864669799805, |
|
"learning_rate": 1.5988e-05, |
|
"loss": 0.1451, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.7808, |
|
"eval_loss": 0.669156551361084, |
|
"eval_runtime": 111.1099, |
|
"eval_samples_per_second": 5.625, |
|
"eval_steps_per_second": 1.413, |
|
"eval_wer": 0.615684476037606, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.4192, |
|
"grad_norm": 8.055611610412598, |
|
"learning_rate": 1.6788000000000003e-05, |
|
"loss": 0.1277, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 14.0576, |
|
"grad_norm": 10.252028465270996, |
|
"learning_rate": 1.7588e-05, |
|
"loss": 0.1231, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.6976, |
|
"grad_norm": 8.062397956848145, |
|
"learning_rate": 1.8388e-05, |
|
"loss": 0.1015, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 15.336, |
|
"grad_norm": 5.604861736297607, |
|
"learning_rate": 1.9188000000000003e-05, |
|
"loss": 0.0966, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.975999999999999, |
|
"grad_norm": 5.753713130950928, |
|
"learning_rate": 1.9988000000000002e-05, |
|
"loss": 0.0949, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 15.975999999999999, |
|
"eval_loss": 0.6928630471229553, |
|
"eval_runtime": 104.9987, |
|
"eval_samples_per_second": 5.952, |
|
"eval_steps_per_second": 1.495, |
|
"eval_wer": 0.6140793396010089, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.6144, |
|
"grad_norm": 7.435614109039307, |
|
"learning_rate": 1.991288888888889e-05, |
|
"loss": 0.0763, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 17.2528, |
|
"grad_norm": 5.643408298492432, |
|
"learning_rate": 1.9824000000000002e-05, |
|
"loss": 0.0767, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 17.8928, |
|
"grad_norm": 6.685403823852539, |
|
"learning_rate": 1.9735111111111112e-05, |
|
"loss": 0.0695, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.5312, |
|
"grad_norm": 5.571532249450684, |
|
"learning_rate": 1.9646222222222223e-05, |
|
"loss": 0.0649, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 19.1696, |
|
"grad_norm": 5.548845291137695, |
|
"learning_rate": 1.9557333333333333e-05, |
|
"loss": 0.0621, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.1696, |
|
"eval_loss": 0.7081567049026489, |
|
"eval_runtime": 106.1316, |
|
"eval_samples_per_second": 5.889, |
|
"eval_steps_per_second": 1.479, |
|
"eval_wer": 0.4790185737216235, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.8096, |
|
"grad_norm": 5.352997779846191, |
|
"learning_rate": 1.9468444444444444e-05, |
|
"loss": 0.0578, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 20.448, |
|
"grad_norm": 4.763257026672363, |
|
"learning_rate": 1.9379555555555558e-05, |
|
"loss": 0.0521, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 21.0864, |
|
"grad_norm": 2.98003888130188, |
|
"learning_rate": 1.9290666666666668e-05, |
|
"loss": 0.0527, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 21.7264, |
|
"grad_norm": 12.276226043701172, |
|
"learning_rate": 1.920177777777778e-05, |
|
"loss": 0.0456, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 22.3648, |
|
"grad_norm": 4.935642719268799, |
|
"learning_rate": 1.911288888888889e-05, |
|
"loss": 0.0466, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 22.3648, |
|
"eval_loss": 0.7455945611000061, |
|
"eval_runtime": 105.1482, |
|
"eval_samples_per_second": 5.944, |
|
"eval_steps_per_second": 1.493, |
|
"eval_wer": 0.4469158449896813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0032, |
|
"grad_norm": 4.248728275299072, |
|
"learning_rate": 1.9024000000000003e-05, |
|
"loss": 0.0449, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 23.6432, |
|
"grad_norm": 6.321920394897461, |
|
"learning_rate": 1.8935111111111113e-05, |
|
"loss": 0.0388, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 24.2816, |
|
"grad_norm": 4.33453893661499, |
|
"learning_rate": 1.8846222222222224e-05, |
|
"loss": 0.0412, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 24.9216, |
|
"grad_norm": 8.20227336883545, |
|
"learning_rate": 1.8757333333333334e-05, |
|
"loss": 0.0386, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"grad_norm": 4.103025913238525, |
|
"learning_rate": 1.8668444444444448e-05, |
|
"loss": 0.0338, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"eval_loss": 0.760087251663208, |
|
"eval_runtime": 103.2404, |
|
"eval_samples_per_second": 6.054, |
|
"eval_steps_per_second": 1.521, |
|
"eval_wer": 0.43659711075441415, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.1984, |
|
"grad_norm": 3.744155168533325, |
|
"learning_rate": 1.858e-05, |
|
"loss": 0.0356, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 26.8384, |
|
"grad_norm": 3.403186082839966, |
|
"learning_rate": 1.8491111111111112e-05, |
|
"loss": 0.0334, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 27.4768, |
|
"grad_norm": 4.127572536468506, |
|
"learning_rate": 1.8402222222222223e-05, |
|
"loss": 0.0327, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 28.1152, |
|
"grad_norm": 4.966237545013428, |
|
"learning_rate": 1.8313333333333333e-05, |
|
"loss": 0.0309, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 28.7552, |
|
"grad_norm": 6.874843120574951, |
|
"learning_rate": 1.8224444444444447e-05, |
|
"loss": 0.0288, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 28.7552, |
|
"eval_loss": 0.7782142758369446, |
|
"eval_runtime": 101.996, |
|
"eval_samples_per_second": 6.128, |
|
"eval_steps_per_second": 1.539, |
|
"eval_wer": 0.3893602384774134, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.3936, |
|
"grad_norm": 2.1933653354644775, |
|
"learning_rate": 1.8135555555555557e-05, |
|
"loss": 0.0275, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 30.032, |
|
"grad_norm": 2.639747381210327, |
|
"learning_rate": 1.8046666666666668e-05, |
|
"loss": 0.027, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 30.672, |
|
"grad_norm": 2.232715368270874, |
|
"learning_rate": 1.7957777777777778e-05, |
|
"loss": 0.0248, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 31.3104, |
|
"grad_norm": 1.4926921129226685, |
|
"learning_rate": 1.7868888888888892e-05, |
|
"loss": 0.0266, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 31.9504, |
|
"grad_norm": 2.4175844192504883, |
|
"learning_rate": 1.7780000000000003e-05, |
|
"loss": 0.0232, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 31.9504, |
|
"eval_loss": 0.79770827293396, |
|
"eval_runtime": 104.7759, |
|
"eval_samples_per_second": 5.965, |
|
"eval_steps_per_second": 1.498, |
|
"eval_wer": 0.41068562256363217, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 32.5888, |
|
"grad_norm": 1.8864479064941406, |
|
"learning_rate": 1.7691111111111113e-05, |
|
"loss": 0.0218, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 33.2272, |
|
"grad_norm": 8.317153930664062, |
|
"learning_rate": 1.7602222222222223e-05, |
|
"loss": 0.0239, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 33.8672, |
|
"grad_norm": 4.491159439086914, |
|
"learning_rate": 1.7513333333333334e-05, |
|
"loss": 0.023, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 34.5056, |
|
"grad_norm": 2.753185272216797, |
|
"learning_rate": 1.7424444444444444e-05, |
|
"loss": 0.0202, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 35.144, |
|
"grad_norm": 2.579869270324707, |
|
"learning_rate": 1.7335555555555558e-05, |
|
"loss": 0.0212, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 35.144, |
|
"eval_loss": 0.797561764717102, |
|
"eval_runtime": 106.2768, |
|
"eval_samples_per_second": 5.881, |
|
"eval_steps_per_second": 1.477, |
|
"eval_wer": 0.41435450584728273, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 35.784, |
|
"grad_norm": 2.2582919597625732, |
|
"learning_rate": 1.724666666666667e-05, |
|
"loss": 0.0193, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 36.4224, |
|
"grad_norm": 2.982079029083252, |
|
"learning_rate": 1.715777777777778e-05, |
|
"loss": 0.0207, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 37.0608, |
|
"grad_norm": 3.09155011177063, |
|
"learning_rate": 1.706888888888889e-05, |
|
"loss": 0.0176, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 37.7008, |
|
"grad_norm": 3.805440902709961, |
|
"learning_rate": 1.6980444444444447e-05, |
|
"loss": 0.0177, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 38.3392, |
|
"grad_norm": 2.447655439376831, |
|
"learning_rate": 1.6891555555555557e-05, |
|
"loss": 0.0178, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 38.3392, |
|
"eval_loss": 0.8184047341346741, |
|
"eval_runtime": 140.2428, |
|
"eval_samples_per_second": 4.457, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 0.4010548039440495, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 38.9792, |
|
"grad_norm": 6.444581031799316, |
|
"learning_rate": 1.6802666666666668e-05, |
|
"loss": 0.018, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 39.6176, |
|
"grad_norm": 1.9832179546356201, |
|
"learning_rate": 1.671377777777778e-05, |
|
"loss": 0.0185, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 40.256, |
|
"grad_norm": 1.374569058418274, |
|
"learning_rate": 1.6624888888888892e-05, |
|
"loss": 0.0159, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 40.896, |
|
"grad_norm": 4.2071309089660645, |
|
"learning_rate": 1.6536000000000002e-05, |
|
"loss": 0.0138, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 41.5344, |
|
"grad_norm": 2.3069539070129395, |
|
"learning_rate": 1.6447111111111113e-05, |
|
"loss": 0.0132, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 41.5344, |
|
"eval_loss": 0.8310635089874268, |
|
"eval_runtime": 138.029, |
|
"eval_samples_per_second": 4.528, |
|
"eval_steps_per_second": 1.137, |
|
"eval_wer": 0.3762898417794084, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.1728, |
|
"grad_norm": 1.6480227708816528, |
|
"learning_rate": 1.6358222222222223e-05, |
|
"loss": 0.016, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 42.8128, |
|
"grad_norm": 4.483267784118652, |
|
"learning_rate": 1.6269333333333334e-05, |
|
"loss": 0.0145, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 43.4512, |
|
"grad_norm": 1.1435110569000244, |
|
"learning_rate": 1.6180444444444444e-05, |
|
"loss": 0.0129, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 44.0896, |
|
"grad_norm": 0.6056487560272217, |
|
"learning_rate": 1.6091555555555555e-05, |
|
"loss": 0.015, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 44.7296, |
|
"grad_norm": 7.710687160491943, |
|
"learning_rate": 1.600266666666667e-05, |
|
"loss": 0.0145, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 44.7296, |
|
"eval_loss": 0.8473762273788452, |
|
"eval_runtime": 109.9727, |
|
"eval_samples_per_second": 5.683, |
|
"eval_steps_per_second": 1.428, |
|
"eval_wer": 0.3790415042421463, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.368, |
|
"grad_norm": 0.3126671314239502, |
|
"learning_rate": 1.591377777777778e-05, |
|
"loss": 0.0151, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 46.0064, |
|
"grad_norm": 1.5604488849639893, |
|
"learning_rate": 1.582488888888889e-05, |
|
"loss": 0.0126, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 46.6464, |
|
"grad_norm": 1.0523457527160645, |
|
"learning_rate": 1.5736000000000003e-05, |
|
"loss": 0.0124, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 47.2848, |
|
"grad_norm": 2.6398849487304688, |
|
"learning_rate": 1.5647111111111114e-05, |
|
"loss": 0.013, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 47.9248, |
|
"grad_norm": 1.1916667222976685, |
|
"learning_rate": 1.5558222222222224e-05, |
|
"loss": 0.0117, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 47.9248, |
|
"eval_loss": 0.8624815940856934, |
|
"eval_runtime": 105.5676, |
|
"eval_samples_per_second": 5.92, |
|
"eval_steps_per_second": 1.487, |
|
"eval_wer": 0.4155010318734235, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 48.5632, |
|
"grad_norm": 1.593981146812439, |
|
"learning_rate": 1.5469333333333335e-05, |
|
"loss": 0.0115, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 49.2016, |
|
"grad_norm": 2.9825565814971924, |
|
"learning_rate": 1.538044444444445e-05, |
|
"loss": 0.0111, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 49.8416, |
|
"grad_norm": 1.278347134590149, |
|
"learning_rate": 1.529155555555556e-05, |
|
"loss": 0.0109, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 50.48, |
|
"grad_norm": 1.986265778541565, |
|
"learning_rate": 1.5202666666666668e-05, |
|
"loss": 0.0115, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 51.1184, |
|
"grad_norm": 0.23057551681995392, |
|
"learning_rate": 1.5114222222222223e-05, |
|
"loss": 0.0101, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 51.1184, |
|
"eval_loss": 0.8906803727149963, |
|
"eval_runtime": 106.8162, |
|
"eval_samples_per_second": 5.851, |
|
"eval_steps_per_second": 1.47, |
|
"eval_wer": 0.3758312313689521, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 51.7584, |
|
"grad_norm": 2.8394742012023926, |
|
"learning_rate": 1.5025333333333333e-05, |
|
"loss": 0.01, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 52.3968, |
|
"grad_norm": 3.907292366027832, |
|
"learning_rate": 1.4936444444444447e-05, |
|
"loss": 0.0101, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 53.0352, |
|
"grad_norm": 0.3342689871788025, |
|
"learning_rate": 1.4847555555555558e-05, |
|
"loss": 0.0106, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 53.6752, |
|
"grad_norm": 14.257401466369629, |
|
"learning_rate": 1.4758666666666668e-05, |
|
"loss": 0.0096, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 54.3136, |
|
"grad_norm": 4.482171535491943, |
|
"learning_rate": 1.4669777777777779e-05, |
|
"loss": 0.0091, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 54.3136, |
|
"eval_loss": 0.8973252773284912, |
|
"eval_runtime": 107.1216, |
|
"eval_samples_per_second": 5.834, |
|
"eval_steps_per_second": 1.466, |
|
"eval_wer": 0.3999082779179087, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 54.9536, |
|
"grad_norm": 0.3064260184764862, |
|
"learning_rate": 1.458088888888889e-05, |
|
"loss": 0.0089, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 55.592, |
|
"grad_norm": 3.7441632747650146, |
|
"learning_rate": 1.4492000000000001e-05, |
|
"loss": 0.01, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 56.2304, |
|
"grad_norm": 2.9929962158203125, |
|
"learning_rate": 1.4403111111111112e-05, |
|
"loss": 0.0099, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 56.8704, |
|
"grad_norm": 3.1295619010925293, |
|
"learning_rate": 1.4314222222222222e-05, |
|
"loss": 0.0083, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 57.5088, |
|
"grad_norm": 3.032670736312866, |
|
"learning_rate": 1.4225777777777779e-05, |
|
"loss": 0.0087, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 57.5088, |
|
"eval_loss": 0.9277347922325134, |
|
"eval_runtime": 108.6583, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 1.445, |
|
"eval_wer": 0.4182526943361614, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.1472, |
|
"grad_norm": 0.5450573563575745, |
|
"learning_rate": 1.4137333333333334e-05, |
|
"loss": 0.0081, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 58.7872, |
|
"grad_norm": 3.2385668754577637, |
|
"learning_rate": 1.4048444444444445e-05, |
|
"loss": 0.008, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 59.4256, |
|
"grad_norm": 2.490182638168335, |
|
"learning_rate": 1.3959555555555557e-05, |
|
"loss": 0.0074, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 60.064, |
|
"grad_norm": 2.916256904602051, |
|
"learning_rate": 1.3870666666666667e-05, |
|
"loss": 0.009, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 60.704, |
|
"grad_norm": 0.9266663789749146, |
|
"learning_rate": 1.3781777777777778e-05, |
|
"loss": 0.0068, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 60.704, |
|
"eval_loss": 0.9448923468589783, |
|
"eval_runtime": 110.101, |
|
"eval_samples_per_second": 5.677, |
|
"eval_steps_per_second": 1.426, |
|
"eval_wer": 0.43889016280669574, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 61.3424, |
|
"grad_norm": 0.37736016511917114, |
|
"learning_rate": 1.3692888888888892e-05, |
|
"loss": 0.0079, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 61.9824, |
|
"grad_norm": 2.7307522296905518, |
|
"learning_rate": 1.3604000000000002e-05, |
|
"loss": 0.008, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 62.6208, |
|
"grad_norm": 0.1229301169514656, |
|
"learning_rate": 1.3515111111111113e-05, |
|
"loss": 0.0065, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 63.2592, |
|
"grad_norm": 0.7501879930496216, |
|
"learning_rate": 1.3426222222222223e-05, |
|
"loss": 0.0074, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 63.8992, |
|
"grad_norm": 0.5861854553222656, |
|
"learning_rate": 1.3337333333333335e-05, |
|
"loss": 0.0073, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 63.8992, |
|
"eval_loss": 0.9372482895851135, |
|
"eval_runtime": 139.4248, |
|
"eval_samples_per_second": 4.483, |
|
"eval_steps_per_second": 1.126, |
|
"eval_wer": 0.3833983031414813, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 64.5376, |
|
"grad_norm": 1.7161897420883179, |
|
"learning_rate": 1.3248444444444446e-05, |
|
"loss": 0.0072, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 65.176, |
|
"grad_norm": 1.3181058168411255, |
|
"learning_rate": 1.3159555555555556e-05, |
|
"loss": 0.0064, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 65.816, |
|
"grad_norm": 0.576824426651001, |
|
"learning_rate": 1.3070666666666667e-05, |
|
"loss": 0.0073, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 66.4544, |
|
"grad_norm": 1.2488709688186646, |
|
"learning_rate": 1.298177777777778e-05, |
|
"loss": 0.0067, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 67.0928, |
|
"grad_norm": 0.8621447682380676, |
|
"learning_rate": 1.2892888888888891e-05, |
|
"loss": 0.0066, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 67.0928, |
|
"eval_loss": 0.9511934518814087, |
|
"eval_runtime": 138.3581, |
|
"eval_samples_per_second": 4.517, |
|
"eval_steps_per_second": 1.135, |
|
"eval_wer": 0.4038064664067874, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 67.7328, |
|
"grad_norm": 2.959468364715576, |
|
"learning_rate": 1.2804000000000001e-05, |
|
"loss": 0.0063, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 68.3712, |
|
"grad_norm": 4.359962463378906, |
|
"learning_rate": 1.2715111111111112e-05, |
|
"loss": 0.0069, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 69.0096, |
|
"grad_norm": 0.8827808499336243, |
|
"learning_rate": 1.2626222222222224e-05, |
|
"loss": 0.0067, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 69.6496, |
|
"grad_norm": 4.478594779968262, |
|
"learning_rate": 1.2537333333333334e-05, |
|
"loss": 0.0063, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 70.288, |
|
"grad_norm": 0.3562716245651245, |
|
"learning_rate": 1.2448444444444445e-05, |
|
"loss": 0.0056, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 70.288, |
|
"eval_loss": 0.9799156188964844, |
|
"eval_runtime": 108.661, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 1.445, |
|
"eval_wer": 0.4063288236642972, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 70.928, |
|
"grad_norm": 0.21852029860019684, |
|
"learning_rate": 1.2359555555555555e-05, |
|
"loss": 0.0058, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 71.5664, |
|
"grad_norm": 0.7513341307640076, |
|
"learning_rate": 1.227111111111111e-05, |
|
"loss": 0.0051, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 72.2048, |
|
"grad_norm": 1.729013204574585, |
|
"learning_rate": 1.2182222222222225e-05, |
|
"loss": 0.0048, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 72.8448, |
|
"grad_norm": 0.363949179649353, |
|
"learning_rate": 1.2093333333333335e-05, |
|
"loss": 0.0043, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 73.4832, |
|
"grad_norm": 0.09823896735906601, |
|
"learning_rate": 1.2004444444444445e-05, |
|
"loss": 0.0049, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 73.4832, |
|
"eval_loss": 0.9892968535423279, |
|
"eval_runtime": 127.4129, |
|
"eval_samples_per_second": 4.905, |
|
"eval_steps_per_second": 1.232, |
|
"eval_wer": 0.3845448291676221, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 74.1216, |
|
"grad_norm": 0.18766258656978607, |
|
"learning_rate": 1.1915555555555556e-05, |
|
"loss": 0.0061, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 74.7616, |
|
"grad_norm": 2.374441623687744, |
|
"learning_rate": 1.1827111111111111e-05, |
|
"loss": 0.0049, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 75.4, |
|
"grad_norm": 1.901435136795044, |
|
"learning_rate": 1.1738222222222223e-05, |
|
"loss": 0.0042, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 76.0384, |
|
"grad_norm": 1.8781205415725708, |
|
"learning_rate": 1.1649333333333336e-05, |
|
"loss": 0.0066, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 76.6784, |
|
"grad_norm": 4.333975791931152, |
|
"learning_rate": 1.1560444444444446e-05, |
|
"loss": 0.0046, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 76.6784, |
|
"eval_loss": 0.9896656274795532, |
|
"eval_runtime": 128.2479, |
|
"eval_samples_per_second": 4.873, |
|
"eval_steps_per_second": 1.224, |
|
"eval_wer": 0.38087594588397156, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 77.3168, |
|
"grad_norm": 1.9272364377975464, |
|
"learning_rate": 1.1471555555555556e-05, |
|
"loss": 0.0035, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 77.9568, |
|
"grad_norm": 0.6323594450950623, |
|
"learning_rate": 1.1382666666666669e-05, |
|
"loss": 0.0051, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 78.5952, |
|
"grad_norm": 1.1000163555145264, |
|
"learning_rate": 1.1293777777777779e-05, |
|
"loss": 0.0038, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 79.2336, |
|
"grad_norm": 0.48444664478302, |
|
"learning_rate": 1.120488888888889e-05, |
|
"loss": 0.0045, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 79.8736, |
|
"grad_norm": 0.6538843512535095, |
|
"learning_rate": 1.1116e-05, |
|
"loss": 0.0044, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 79.8736, |
|
"eval_loss": 0.9970444440841675, |
|
"eval_runtime": 131.0293, |
|
"eval_samples_per_second": 4.77, |
|
"eval_steps_per_second": 1.198, |
|
"eval_wer": 0.37491401054803947, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 80.512, |
|
"grad_norm": 3.5870893001556396, |
|
"learning_rate": 1.1027111111111112e-05, |
|
"loss": 0.0045, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 81.1504, |
|
"grad_norm": 0.1785198152065277, |
|
"learning_rate": 1.0938222222222223e-05, |
|
"loss": 0.0035, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 81.7904, |
|
"grad_norm": 0.870185375213623, |
|
"learning_rate": 1.0849333333333335e-05, |
|
"loss": 0.0039, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 82.4288, |
|
"grad_norm": 0.21784505248069763, |
|
"learning_rate": 1.0760444444444445e-05, |
|
"loss": 0.0041, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 83.0672, |
|
"grad_norm": 3.4108142852783203, |
|
"learning_rate": 1.0671555555555557e-05, |
|
"loss": 0.0039, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 83.0672, |
|
"eval_loss": 1.011253833770752, |
|
"eval_runtime": 125.5354, |
|
"eval_samples_per_second": 4.979, |
|
"eval_steps_per_second": 1.251, |
|
"eval_wer": 0.37606053657418026, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 83.7072, |
|
"grad_norm": 0.04159156233072281, |
|
"learning_rate": 1.0582666666666668e-05, |
|
"loss": 0.0031, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 84.3456, |
|
"grad_norm": 0.20573270320892334, |
|
"learning_rate": 1.0493777777777778e-05, |
|
"loss": 0.0043, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 84.9856, |
|
"grad_norm": 2.9891881942749023, |
|
"learning_rate": 1.0404888888888889e-05, |
|
"loss": 0.0041, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 85.624, |
|
"grad_norm": 2.5772018432617188, |
|
"learning_rate": 1.0316e-05, |
|
"loss": 0.0039, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 86.2624, |
|
"grad_norm": 0.12921689450740814, |
|
"learning_rate": 1.0227111111111111e-05, |
|
"loss": 0.0035, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 86.2624, |
|
"eval_loss": 1.0148670673370361, |
|
"eval_runtime": 126.4781, |
|
"eval_samples_per_second": 4.942, |
|
"eval_steps_per_second": 1.241, |
|
"eval_wer": 0.38316899793625314, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 86.9024, |
|
"grad_norm": 1.1538918018341064, |
|
"learning_rate": 1.0138222222222223e-05, |
|
"loss": 0.0041, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 87.5408, |
|
"grad_norm": 3.53802490234375, |
|
"learning_rate": 1.0049333333333334e-05, |
|
"loss": 0.0037, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 88.1792, |
|
"grad_norm": 0.03811612352728844, |
|
"learning_rate": 9.960444444444444e-06, |
|
"loss": 0.0035, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 88.8192, |
|
"grad_norm": 0.8571044206619263, |
|
"learning_rate": 9.871555555555557e-06, |
|
"loss": 0.0027, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 89.4576, |
|
"grad_norm": 1.1792042255401611, |
|
"learning_rate": 9.782666666666667e-06, |
|
"loss": 0.003, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 89.4576, |
|
"eval_loss": 1.0032455921173096, |
|
"eval_runtime": 152.9287, |
|
"eval_samples_per_second": 4.087, |
|
"eval_steps_per_second": 1.027, |
|
"eval_wer": 0.38592066039899103, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 90.096, |
|
"grad_norm": 0.32366737723350525, |
|
"learning_rate": 9.693777777777779e-06, |
|
"loss": 0.0034, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 90.736, |
|
"grad_norm": 0.4921131134033203, |
|
"learning_rate": 9.60488888888889e-06, |
|
"loss": 0.0035, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 91.3744, |
|
"grad_norm": 3.0860531330108643, |
|
"learning_rate": 9.516e-06, |
|
"loss": 0.0031, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 92.0128, |
|
"grad_norm": 3.8854892253875732, |
|
"learning_rate": 9.427111111111112e-06, |
|
"loss": 0.0034, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 92.6528, |
|
"grad_norm": 0.3374783396720886, |
|
"learning_rate": 9.338222222222223e-06, |
|
"loss": 0.0025, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 92.6528, |
|
"eval_loss": 1.0094186067581177, |
|
"eval_runtime": 135.2467, |
|
"eval_samples_per_second": 4.621, |
|
"eval_steps_per_second": 1.161, |
|
"eval_wer": 0.3856913551937629, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 93.2912, |
|
"grad_norm": 0.9511881470680237, |
|
"learning_rate": 9.249333333333335e-06, |
|
"loss": 0.0023, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 93.9312, |
|
"grad_norm": 0.5302645564079285, |
|
"learning_rate": 9.160888888888888e-06, |
|
"loss": 0.0025, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 94.5696, |
|
"grad_norm": 0.22430481016635895, |
|
"learning_rate": 9.072444444444445e-06, |
|
"loss": 0.003, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 95.208, |
|
"grad_norm": 0.18836861848831177, |
|
"learning_rate": 8.983555555555556e-06, |
|
"loss": 0.0025, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 95.848, |
|
"grad_norm": 0.5240734815597534, |
|
"learning_rate": 8.894666666666666e-06, |
|
"loss": 0.0034, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 95.848, |
|
"eval_loss": 1.02016282081604, |
|
"eval_runtime": 122.5101, |
|
"eval_samples_per_second": 5.102, |
|
"eval_steps_per_second": 1.282, |
|
"eval_wer": 0.3733088741114423, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 96.4864, |
|
"grad_norm": 0.10021472722291946, |
|
"learning_rate": 8.805777777777778e-06, |
|
"loss": 0.0029, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 97.1248, |
|
"grad_norm": 0.07506231218576431, |
|
"learning_rate": 8.716888888888889e-06, |
|
"loss": 0.0028, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 97.7648, |
|
"grad_norm": 0.6782508492469788, |
|
"learning_rate": 8.628000000000001e-06, |
|
"loss": 0.0031, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 98.4032, |
|
"grad_norm": 0.1539822518825531, |
|
"learning_rate": 8.539111111111112e-06, |
|
"loss": 0.0032, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 99.0416, |
|
"grad_norm": 0.3094239830970764, |
|
"learning_rate": 8.450222222222224e-06, |
|
"loss": 0.0027, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 99.0416, |
|
"eval_loss": 1.011278510093689, |
|
"eval_runtime": 106.3459, |
|
"eval_samples_per_second": 5.877, |
|
"eval_steps_per_second": 1.476, |
|
"eval_wer": 0.36551249713368494, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 99.6816, |
|
"grad_norm": 0.32241737842559814, |
|
"learning_rate": 8.361333333333334e-06, |
|
"loss": 0.0025, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 100.32, |
|
"grad_norm": 0.7617099285125732, |
|
"learning_rate": 8.272444444444445e-06, |
|
"loss": 0.0024, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 100.96, |
|
"grad_norm": 0.16760019958019257, |
|
"learning_rate": 8.183555555555555e-06, |
|
"loss": 0.0022, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 101.5984, |
|
"grad_norm": 0.05670633167028427, |
|
"learning_rate": 8.095111111111112e-06, |
|
"loss": 0.0029, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 102.2368, |
|
"grad_norm": 1.7071377038955688, |
|
"learning_rate": 8.006222222222223e-06, |
|
"loss": 0.0023, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 102.2368, |
|
"eval_loss": 1.0178241729736328, |
|
"eval_runtime": 104.5181, |
|
"eval_samples_per_second": 5.98, |
|
"eval_steps_per_second": 1.502, |
|
"eval_wer": 0.3767484521898647, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 102.8768, |
|
"grad_norm": 0.09089858084917068, |
|
"learning_rate": 7.917333333333333e-06, |
|
"loss": 0.0021, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 103.5152, |
|
"grad_norm": 0.31245774030685425, |
|
"learning_rate": 7.828444444444445e-06, |
|
"loss": 0.0027, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 104.1536, |
|
"grad_norm": 0.11016162484884262, |
|
"learning_rate": 7.739555555555556e-06, |
|
"loss": 0.0024, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 104.7936, |
|
"grad_norm": 1.1260924339294434, |
|
"learning_rate": 7.650666666666668e-06, |
|
"loss": 0.002, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 105.432, |
|
"grad_norm": 0.3586418330669403, |
|
"learning_rate": 7.561777777777778e-06, |
|
"loss": 0.002, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 105.432, |
|
"eval_loss": 1.0109715461730957, |
|
"eval_runtime": 110.8526, |
|
"eval_samples_per_second": 5.638, |
|
"eval_steps_per_second": 1.416, |
|
"eval_wer": 0.36711763357028204, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 106.0704, |
|
"grad_norm": 0.06334754824638367, |
|
"learning_rate": 7.4728888888888895e-06, |
|
"loss": 0.0021, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 106.7104, |
|
"grad_norm": 0.018475733697414398, |
|
"learning_rate": 7.384e-06, |
|
"loss": 0.002, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 107.3488, |
|
"grad_norm": 0.0939849466085434, |
|
"learning_rate": 7.295111111111112e-06, |
|
"loss": 0.0018, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 107.9888, |
|
"grad_norm": 0.6405438780784607, |
|
"learning_rate": 7.206222222222223e-06, |
|
"loss": 0.0025, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 108.6272, |
|
"grad_norm": 2.7534918785095215, |
|
"learning_rate": 7.117333333333334e-06, |
|
"loss": 0.002, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 108.6272, |
|
"eval_loss": 1.0300931930541992, |
|
"eval_runtime": 115.653, |
|
"eval_samples_per_second": 5.404, |
|
"eval_steps_per_second": 1.358, |
|
"eval_wer": 0.3733088741114423, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 109.2656, |
|
"grad_norm": 0.15731623768806458, |
|
"learning_rate": 7.028444444444445e-06, |
|
"loss": 0.0018, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 109.9056, |
|
"grad_norm": 1.9066952466964722, |
|
"learning_rate": 6.9395555555555565e-06, |
|
"loss": 0.0025, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 110.544, |
|
"grad_norm": 0.7084272503852844, |
|
"learning_rate": 6.850666666666668e-06, |
|
"loss": 0.002, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 111.1824, |
|
"grad_norm": 0.11165124177932739, |
|
"learning_rate": 6.761777777777778e-06, |
|
"loss": 0.0017, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 111.8224, |
|
"grad_norm": 0.16031847894191742, |
|
"learning_rate": 6.6733333333333335e-06, |
|
"loss": 0.0018, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 111.8224, |
|
"eval_loss": 1.047860026359558, |
|
"eval_runtime": 128.5964, |
|
"eval_samples_per_second": 4.86, |
|
"eval_steps_per_second": 1.221, |
|
"eval_wer": 0.3850034395780784, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 112.4608, |
|
"grad_norm": 0.2045450359582901, |
|
"learning_rate": 6.584444444444446e-06, |
|
"loss": 0.0023, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 113.0992, |
|
"grad_norm": 0.3708573281764984, |
|
"learning_rate": 6.495555555555556e-06, |
|
"loss": 0.002, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 113.7392, |
|
"grad_norm": 0.015015633776783943, |
|
"learning_rate": 6.4066666666666674e-06, |
|
"loss": 0.002, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 114.3776, |
|
"grad_norm": 0.04302428662776947, |
|
"learning_rate": 6.317777777777778e-06, |
|
"loss": 0.002, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 115.016, |
|
"grad_norm": 0.14831684529781342, |
|
"learning_rate": 6.22888888888889e-06, |
|
"loss": 0.002, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 115.016, |
|
"eval_loss": 1.0215903520584106, |
|
"eval_runtime": 125.0936, |
|
"eval_samples_per_second": 4.996, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 0.36253152946571887, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 115.656, |
|
"grad_norm": 0.06726387143135071, |
|
"learning_rate": 6.1400000000000005e-06, |
|
"loss": 0.0017, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 116.2944, |
|
"grad_norm": 0.059277065098285675, |
|
"learning_rate": 6.051111111111112e-06, |
|
"loss": 0.0016, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 116.9344, |
|
"grad_norm": 1.2333521842956543, |
|
"learning_rate": 5.962222222222222e-06, |
|
"loss": 0.0015, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 117.5728, |
|
"grad_norm": 0.1064881980419159, |
|
"learning_rate": 5.873333333333334e-06, |
|
"loss": 0.0013, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 118.2112, |
|
"grad_norm": 0.025924814864993095, |
|
"learning_rate": 5.784444444444445e-06, |
|
"loss": 0.002, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 118.2112, |
|
"eval_loss": 1.040561556816101, |
|
"eval_runtime": 127.336, |
|
"eval_samples_per_second": 4.908, |
|
"eval_steps_per_second": 1.233, |
|
"eval_wer": 0.36872277000687914, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 118.8512, |
|
"grad_norm": 0.05642084404826164, |
|
"learning_rate": 5.695555555555556e-06, |
|
"loss": 0.0017, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 119.4896, |
|
"grad_norm": 0.045298777520656586, |
|
"learning_rate": 5.606666666666667e-06, |
|
"loss": 0.0013, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 120.128, |
|
"grad_norm": 0.4968818724155426, |
|
"learning_rate": 5.517777777777779e-06, |
|
"loss": 0.0018, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 120.768, |
|
"grad_norm": 0.10297541320323944, |
|
"learning_rate": 5.428888888888889e-06, |
|
"loss": 0.0013, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 121.4064, |
|
"grad_norm": 0.022102700546383858, |
|
"learning_rate": 5.3400000000000005e-06, |
|
"loss": 0.0011, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 121.4064, |
|
"eval_loss": 1.0520411729812622, |
|
"eval_runtime": 132.307, |
|
"eval_samples_per_second": 4.724, |
|
"eval_steps_per_second": 1.187, |
|
"eval_wer": 0.41297867461591375, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 122.0448, |
|
"grad_norm": 0.13088802993297577, |
|
"learning_rate": 5.251555555555557e-06, |
|
"loss": 0.0018, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 122.6848, |
|
"grad_norm": 0.1453741490840912, |
|
"learning_rate": 5.162666666666667e-06, |
|
"loss": 0.0016, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 123.3232, |
|
"grad_norm": 0.0916297510266304, |
|
"learning_rate": 5.0737777777777785e-06, |
|
"loss": 0.0014, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 123.9632, |
|
"grad_norm": 3.053272008895874, |
|
"learning_rate": 4.984888888888889e-06, |
|
"loss": 0.0014, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 124.6016, |
|
"grad_norm": 0.06683260202407837, |
|
"learning_rate": 4.896e-06, |
|
"loss": 0.0014, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 124.6016, |
|
"eval_loss": 1.0563929080963135, |
|
"eval_runtime": 129.9946, |
|
"eval_samples_per_second": 4.808, |
|
"eval_steps_per_second": 1.208, |
|
"eval_wer": 0.36620041274936943, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 125.24, |
|
"grad_norm": 0.12738560140132904, |
|
"learning_rate": 4.8071111111111115e-06, |
|
"loss": 0.001, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 125.88, |
|
"grad_norm": 0.2464442402124405, |
|
"learning_rate": 4.718222222222222e-06, |
|
"loss": 0.0016, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 126.5184, |
|
"grad_norm": 0.026401793584227562, |
|
"learning_rate": 4.629333333333333e-06, |
|
"loss": 0.0015, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 127.1568, |
|
"grad_norm": 0.2598608732223511, |
|
"learning_rate": 4.540444444444445e-06, |
|
"loss": 0.0017, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 127.7968, |
|
"grad_norm": 0.11029820889234543, |
|
"learning_rate": 4.451555555555556e-06, |
|
"loss": 0.0009, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 127.7968, |
|
"eval_loss": 1.0525336265563965, |
|
"eval_runtime": 130.5048, |
|
"eval_samples_per_second": 4.789, |
|
"eval_steps_per_second": 1.203, |
|
"eval_wer": 0.37881219903691815, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 128.4352, |
|
"grad_norm": 0.03331665322184563, |
|
"learning_rate": 4.362666666666667e-06, |
|
"loss": 0.001, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 129.0736, |
|
"grad_norm": 0.12977726757526398, |
|
"learning_rate": 4.2737777777777785e-06, |
|
"loss": 0.0015, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 129.7136, |
|
"grad_norm": 0.031082535162568092, |
|
"learning_rate": 4.18488888888889e-06, |
|
"loss": 0.0011, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 130.352, |
|
"grad_norm": 2.1375820636749268, |
|
"learning_rate": 4.096e-06, |
|
"loss": 0.001, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 130.992, |
|
"grad_norm": 1.0629844665527344, |
|
"learning_rate": 4.0071111111111116e-06, |
|
"loss": 0.0009, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 130.992, |
|
"eval_loss": 1.0391464233398438, |
|
"eval_runtime": 131.1426, |
|
"eval_samples_per_second": 4.766, |
|
"eval_steps_per_second": 1.197, |
|
"eval_wer": 0.359779867002981, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 131.6304, |
|
"grad_norm": 0.0641772672533989, |
|
"learning_rate": 3.918222222222223e-06, |
|
"loss": 0.0012, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 132.2688, |
|
"grad_norm": 0.018780462443828583, |
|
"learning_rate": 3.829333333333334e-06, |
|
"loss": 0.0012, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 132.9088, |
|
"grad_norm": 0.021932663396000862, |
|
"learning_rate": 3.7408888888888895e-06, |
|
"loss": 0.0013, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 133.5472, |
|
"grad_norm": 0.40411534905433655, |
|
"learning_rate": 3.6520000000000004e-06, |
|
"loss": 0.0013, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 134.1856, |
|
"grad_norm": 2.085688352584839, |
|
"learning_rate": 3.5631111111111117e-06, |
|
"loss": 0.0011, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 134.1856, |
|
"eval_loss": 1.04826819896698, |
|
"eval_runtime": 125.1858, |
|
"eval_samples_per_second": 4.993, |
|
"eval_steps_per_second": 1.254, |
|
"eval_wer": 0.3762898417794084, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 134.8256, |
|
"grad_norm": 0.1101042777299881, |
|
"learning_rate": 3.4742222222222225e-06, |
|
"loss": 0.001, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 135.464, |
|
"grad_norm": 0.5308594107627869, |
|
"learning_rate": 3.385333333333334e-06, |
|
"loss": 0.0012, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 136.1024, |
|
"grad_norm": 0.10180327296257019, |
|
"learning_rate": 3.2964444444444447e-06, |
|
"loss": 0.001, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 136.7424, |
|
"grad_norm": 0.029545770958065987, |
|
"learning_rate": 3.207555555555556e-06, |
|
"loss": 0.0009, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 137.3808, |
|
"grad_norm": 0.022081894800066948, |
|
"learning_rate": 3.118666666666667e-06, |
|
"loss": 0.001, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 137.3808, |
|
"eval_loss": 1.0376399755477905, |
|
"eval_runtime": 129.9317, |
|
"eval_samples_per_second": 4.81, |
|
"eval_steps_per_second": 1.208, |
|
"eval_wer": 0.4168768631047925, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 138.0192, |
|
"grad_norm": 0.7658660411834717, |
|
"learning_rate": 3.029777777777778e-06, |
|
"loss": 0.0011, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 138.6592, |
|
"grad_norm": 0.027192605659365654, |
|
"learning_rate": 2.940888888888889e-06, |
|
"loss": 0.0011, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 139.2976, |
|
"grad_norm": 0.03390691056847572, |
|
"learning_rate": 2.8520000000000004e-06, |
|
"loss": 0.001, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 139.9376, |
|
"grad_norm": 0.08181264251470566, |
|
"learning_rate": 2.7631111111111113e-06, |
|
"loss": 0.0011, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 140.576, |
|
"grad_norm": 0.3259316086769104, |
|
"learning_rate": 2.6742222222222226e-06, |
|
"loss": 0.0008, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 140.576, |
|
"eval_loss": 1.0424144268035889, |
|
"eval_runtime": 104.1123, |
|
"eval_samples_per_second": 6.003, |
|
"eval_steps_per_second": 1.508, |
|
"eval_wer": 0.36321944508140336, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 141.2144, |
|
"grad_norm": 0.02337617427110672, |
|
"learning_rate": 2.5853333333333335e-06, |
|
"loss": 0.0005, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 141.8544, |
|
"grad_norm": 0.01961325667798519, |
|
"learning_rate": 2.4964444444444448e-06, |
|
"loss": 0.001, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 142.4928, |
|
"grad_norm": 0.0774613469839096, |
|
"learning_rate": 2.4075555555555556e-06, |
|
"loss": 0.0008, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 143.1312, |
|
"grad_norm": 0.4658966660499573, |
|
"learning_rate": 2.318666666666667e-06, |
|
"loss": 0.0007, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 143.7712, |
|
"grad_norm": 0.39419323205947876, |
|
"learning_rate": 2.229777777777778e-06, |
|
"loss": 0.0007, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 143.7712, |
|
"eval_loss": 1.0494757890701294, |
|
"eval_runtime": 104.6831, |
|
"eval_samples_per_second": 5.97, |
|
"eval_steps_per_second": 1.5, |
|
"eval_wer": 0.4159596422838798, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 144.4096, |
|
"grad_norm": 1.093592882156372, |
|
"learning_rate": 2.140888888888889e-06, |
|
"loss": 0.0008, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 145.048, |
|
"grad_norm": 0.011096829548478127, |
|
"learning_rate": 2.052e-06, |
|
"loss": 0.0008, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 145.688, |
|
"grad_norm": 0.18569226562976837, |
|
"learning_rate": 1.9631111111111113e-06, |
|
"loss": 0.0009, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 146.3264, |
|
"grad_norm": 0.17811138927936554, |
|
"learning_rate": 1.8742222222222222e-06, |
|
"loss": 0.0008, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 146.9664, |
|
"grad_norm": 0.01262740883976221, |
|
"learning_rate": 1.7853333333333333e-06, |
|
"loss": 0.0009, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 146.9664, |
|
"eval_loss": 1.054679274559021, |
|
"eval_runtime": 104.7175, |
|
"eval_samples_per_second": 5.968, |
|
"eval_steps_per_second": 1.499, |
|
"eval_wer": 0.36872277000687914, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 147.6048, |
|
"grad_norm": 7.735331058502197, |
|
"learning_rate": 1.6964444444444444e-06, |
|
"loss": 0.0008, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 148.2432, |
|
"grad_norm": 0.2187137007713318, |
|
"learning_rate": 1.608e-06, |
|
"loss": 0.0009, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 148.8832, |
|
"grad_norm": 0.15815149247646332, |
|
"learning_rate": 1.5191111111111112e-06, |
|
"loss": 0.0008, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 149.5216, |
|
"grad_norm": 0.3170558214187622, |
|
"learning_rate": 1.430666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 150.16, |
|
"grad_norm": 0.09482572972774506, |
|
"learning_rate": 1.341777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 150.16, |
|
"eval_loss": 1.058286428451538, |
|
"eval_runtime": 103.6331, |
|
"eval_samples_per_second": 6.031, |
|
"eval_steps_per_second": 1.515, |
|
"eval_wer": 0.3689520752121073, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 150.8, |
|
"grad_norm": 0.012617244385182858, |
|
"learning_rate": 1.2528888888888891e-06, |
|
"loss": 0.0006, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 151.4384, |
|
"grad_norm": 0.032891806215047836, |
|
"learning_rate": 1.1640000000000002e-06, |
|
"loss": 0.0009, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 152.0768, |
|
"grad_norm": 1.6969341039657593, |
|
"learning_rate": 1.0751111111111113e-06, |
|
"loss": 0.0006, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 152.7168, |
|
"grad_norm": 0.2790795862674713, |
|
"learning_rate": 9.862222222222224e-07, |
|
"loss": 0.0006, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 153.3552, |
|
"grad_norm": 0.33090606331825256, |
|
"learning_rate": 8.973333333333334e-07, |
|
"loss": 0.0007, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 153.3552, |
|
"eval_loss": 1.0462735891342163, |
|
"eval_runtime": 105.9059, |
|
"eval_samples_per_second": 5.901, |
|
"eval_steps_per_second": 1.482, |
|
"eval_wer": 0.4122907590002293, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 153.9952, |
|
"grad_norm": 0.005133169237524271, |
|
"learning_rate": 8.084444444444445e-07, |
|
"loss": 0.0008, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 154.6336, |
|
"grad_norm": 0.020690323784947395, |
|
"learning_rate": 7.195555555555556e-07, |
|
"loss": 0.001, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 155.272, |
|
"grad_norm": 2.341998338699341, |
|
"learning_rate": 6.306666666666666e-07, |
|
"loss": 0.0007, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 155.912, |
|
"grad_norm": 0.0469956174492836, |
|
"learning_rate": 5.417777777777778e-07, |
|
"loss": 0.0006, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 156.5504, |
|
"grad_norm": 0.006763980723917484, |
|
"learning_rate": 4.528888888888889e-07, |
|
"loss": 0.0007, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 156.5504, |
|
"eval_loss": 1.0463974475860596, |
|
"eval_runtime": 104.646, |
|
"eval_samples_per_second": 5.973, |
|
"eval_steps_per_second": 1.5, |
|
"eval_wer": 0.3600091722082091, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 157.1888, |
|
"grad_norm": 0.01508264522999525, |
|
"learning_rate": 3.6400000000000003e-07, |
|
"loss": 0.0008, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 157.8288, |
|
"grad_norm": 0.008547438308596611, |
|
"learning_rate": 2.751111111111111e-07, |
|
"loss": 0.0013, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 158.4672, |
|
"grad_norm": 0.03822452574968338, |
|
"learning_rate": 1.8622222222222221e-07, |
|
"loss": 0.0005, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 159.1056, |
|
"grad_norm": 0.07067080587148666, |
|
"learning_rate": 9.733333333333334e-08, |
|
"loss": 0.0006, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 159.7456, |
|
"grad_norm": 0.007019846700131893, |
|
"learning_rate": 8.444444444444445e-09, |
|
"loss": 0.0005, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 159.7456, |
|
"eval_loss": 1.0454859733581543, |
|
"eval_runtime": 104.5679, |
|
"eval_samples_per_second": 5.977, |
|
"eval_steps_per_second": 1.501, |
|
"eval_wer": 0.3609263930291218, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 159.7456, |
|
"step": 50000, |
|
"total_flos": 1.966379287412736e+19, |
|
"train_loss": 0.0770821487186849, |
|
"train_runtime": 117170.0508, |
|
"train_samples_per_second": 6.828, |
|
"train_steps_per_second": 0.427 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 50000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 161, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.966379287412736e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|