|
{ |
|
"best_metric": 17.39402275853445, |
|
"best_model_checkpoint": "whisper-small-german-retrain/checkpoint-3000", |
|
"epoch": 130.43478260869566, |
|
"eval_steps": 1000, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 2.7223403453826904, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.6229, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 2.2247281074523926, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.3885, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.521739130434782, |
|
"grad_norm": 1.4519989490509033, |
|
"learning_rate": 5.25e-06, |
|
"loss": 0.2281, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 1.1439716815948486, |
|
"learning_rate": 7e-06, |
|
"loss": 0.1268, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.869565217391305, |
|
"grad_norm": 0.7530131340026855, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.0546, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 1.6841254234313965, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.0209, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.217391304347826, |
|
"grad_norm": 0.25525057315826416, |
|
"learning_rate": 1.2249999999999998e-05, |
|
"loss": 0.0098, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 0.227842316031456, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.0065, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.565217391304348, |
|
"grad_norm": 0.9949818253517151, |
|
"learning_rate": 1.575e-05, |
|
"loss": 0.0078, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 0.39849549531936646, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0078, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 23.91304347826087, |
|
"grad_norm": 0.8715730905532837, |
|
"learning_rate": 1.7305555555555553e-05, |
|
"loss": 0.0071, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"grad_norm": 0.20837093889713287, |
|
"learning_rate": 1.711111111111111e-05, |
|
"loss": 0.0049, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 28.26086956521739, |
|
"grad_norm": 0.10564578324556351, |
|
"learning_rate": 1.6916666666666667e-05, |
|
"loss": 0.0036, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 30.434782608695652, |
|
"grad_norm": 0.2511199712753296, |
|
"learning_rate": 1.6722222222222222e-05, |
|
"loss": 0.0029, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 32.608695652173914, |
|
"grad_norm": 0.05474904552102089, |
|
"learning_rate": 1.6527777777777777e-05, |
|
"loss": 0.0016, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"grad_norm": 0.0346197709441185, |
|
"learning_rate": 1.633333333333333e-05, |
|
"loss": 0.0014, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 36.95652173913044, |
|
"grad_norm": 0.022180059924721718, |
|
"learning_rate": 1.613888888888889e-05, |
|
"loss": 0.0017, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 39.130434782608695, |
|
"grad_norm": 0.011027761735022068, |
|
"learning_rate": 1.594444444444444e-05, |
|
"loss": 0.0009, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 41.30434782608695, |
|
"grad_norm": 0.010779116302728653, |
|
"learning_rate": 1.575e-05, |
|
"loss": 0.0007, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 43.47826086956522, |
|
"grad_norm": 0.008251098915934563, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.0006, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 43.47826086956522, |
|
"eval_loss": 0.5149791240692139, |
|
"eval_runtime": 40.8401, |
|
"eval_samples_per_second": 7.027, |
|
"eval_steps_per_second": 0.22, |
|
"eval_wer": 17.994247842941103, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.65217391304348, |
|
"grad_norm": 0.007410943973809481, |
|
"learning_rate": 1.536111111111111e-05, |
|
"loss": 0.0006, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 47.82608695652174, |
|
"grad_norm": 0.006867844145745039, |
|
"learning_rate": 1.5166666666666667e-05, |
|
"loss": 0.0005, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.00803290493786335, |
|
"learning_rate": 1.497222222222222e-05, |
|
"loss": 0.0005, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 52.17391304347826, |
|
"grad_norm": 0.005789933726191521, |
|
"learning_rate": 1.4777777777777777e-05, |
|
"loss": 0.0005, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 54.34782608695652, |
|
"grad_norm": 0.005295329727232456, |
|
"learning_rate": 1.4583333333333333e-05, |
|
"loss": 0.0004, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 56.52173913043478, |
|
"grad_norm": 0.004821299575269222, |
|
"learning_rate": 1.4388888888888886e-05, |
|
"loss": 0.0004, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 58.69565217391305, |
|
"grad_norm": 0.004488619975745678, |
|
"learning_rate": 1.4194444444444443e-05, |
|
"loss": 0.0004, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 60.869565217391305, |
|
"grad_norm": 0.0040580639615654945, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.0004, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 63.04347826086956, |
|
"grad_norm": 0.004379584453999996, |
|
"learning_rate": 1.3805555555555553e-05, |
|
"loss": 0.0003, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 65.21739130434783, |
|
"grad_norm": 0.003948231227695942, |
|
"learning_rate": 1.361111111111111e-05, |
|
"loss": 0.0003, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 67.3913043478261, |
|
"grad_norm": 0.0037093530409038067, |
|
"learning_rate": 1.3416666666666666e-05, |
|
"loss": 0.0003, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 69.56521739130434, |
|
"grad_norm": 0.0037487975787371397, |
|
"learning_rate": 1.3222222222222221e-05, |
|
"loss": 0.0003, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 71.73913043478261, |
|
"grad_norm": 0.003608859609812498, |
|
"learning_rate": 1.3027777777777776e-05, |
|
"loss": 0.0003, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 73.91304347826087, |
|
"grad_norm": 0.0034225275740027428, |
|
"learning_rate": 1.2833333333333331e-05, |
|
"loss": 0.0003, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 76.08695652173913, |
|
"grad_norm": 0.003319317242130637, |
|
"learning_rate": 1.2638888888888888e-05, |
|
"loss": 0.0003, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 78.26086956521739, |
|
"grad_norm": 0.003253102768212557, |
|
"learning_rate": 1.2444444444444445e-05, |
|
"loss": 0.0002, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 80.43478260869566, |
|
"grad_norm": 0.003029271960258484, |
|
"learning_rate": 1.2249999999999998e-05, |
|
"loss": 0.0002, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 82.6086956521739, |
|
"grad_norm": 0.002931032096967101, |
|
"learning_rate": 1.2055555555555555e-05, |
|
"loss": 0.0002, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 84.78260869565217, |
|
"grad_norm": 0.002701626857742667, |
|
"learning_rate": 1.1861111111111111e-05, |
|
"loss": 0.0002, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 86.95652173913044, |
|
"grad_norm": 0.002894147066399455, |
|
"learning_rate": 1.1666666666666665e-05, |
|
"loss": 0.0002, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 86.95652173913044, |
|
"eval_loss": 0.5610591173171997, |
|
"eval_runtime": 40.2761, |
|
"eval_samples_per_second": 7.126, |
|
"eval_steps_per_second": 0.223, |
|
"eval_wer": 17.456546204826807, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 89.1304347826087, |
|
"grad_norm": 0.0025357184931635857, |
|
"learning_rate": 1.1472222222222221e-05, |
|
"loss": 0.0002, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 91.30434782608695, |
|
"grad_norm": 0.0023813284933567047, |
|
"learning_rate": 1.1277777777777778e-05, |
|
"loss": 0.0002, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 93.47826086956522, |
|
"grad_norm": 0.002499406924471259, |
|
"learning_rate": 1.1083333333333331e-05, |
|
"loss": 0.0002, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 95.65217391304348, |
|
"grad_norm": 0.002111413050442934, |
|
"learning_rate": 1.0888888888888888e-05, |
|
"loss": 0.0002, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 97.82608695652173, |
|
"grad_norm": 0.0020701128523796797, |
|
"learning_rate": 1.0694444444444444e-05, |
|
"loss": 0.0002, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 0.0028157017659395933, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.0002, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 102.17391304347827, |
|
"grad_norm": 0.0020910503808408976, |
|
"learning_rate": 1.0305555555555554e-05, |
|
"loss": 0.0002, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 104.34782608695652, |
|
"grad_norm": 0.0019400820601731539, |
|
"learning_rate": 1.011111111111111e-05, |
|
"loss": 0.0002, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 106.52173913043478, |
|
"grad_norm": 0.0016938770422711968, |
|
"learning_rate": 9.916666666666666e-06, |
|
"loss": 0.0002, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 108.69565217391305, |
|
"grad_norm": 0.0018923500319942832, |
|
"learning_rate": 9.722222222222221e-06, |
|
"loss": 0.0002, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 110.8695652173913, |
|
"grad_norm": 0.0017991146305575967, |
|
"learning_rate": 9.527777777777776e-06, |
|
"loss": 0.0001, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 113.04347826086956, |
|
"grad_norm": 0.0018069138750433922, |
|
"learning_rate": 9.333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 115.21739130434783, |
|
"grad_norm": 0.0017327632522210479, |
|
"learning_rate": 9.13888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 117.3913043478261, |
|
"grad_norm": 0.0015717543428763747, |
|
"learning_rate": 8.944444444444443e-06, |
|
"loss": 0.0001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 119.56521739130434, |
|
"grad_norm": 0.001588036073371768, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.0001, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 121.73913043478261, |
|
"grad_norm": 0.0015617014141753316, |
|
"learning_rate": 8.555555555555554e-06, |
|
"loss": 0.0001, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 123.91304347826087, |
|
"grad_norm": 0.0015488613862544298, |
|
"learning_rate": 8.361111111111111e-06, |
|
"loss": 0.0001, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 126.08695652173913, |
|
"grad_norm": 0.0016123673412948847, |
|
"learning_rate": 8.166666666666666e-06, |
|
"loss": 0.0001, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 128.2608695652174, |
|
"grad_norm": 0.0014842275995761156, |
|
"learning_rate": 7.97222222222222e-06, |
|
"loss": 0.0001, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 130.43478260869566, |
|
"grad_norm": 0.001505482941865921, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.0001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 130.43478260869566, |
|
"eval_loss": 0.5873442888259888, |
|
"eval_runtime": 40.5248, |
|
"eval_samples_per_second": 7.082, |
|
"eval_steps_per_second": 0.222, |
|
"eval_wer": 17.39402275853445, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 218, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.43204299022336e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|