leduckhai's picture
Upload folder using huggingface_hub
df5683d verified
{
"best_metric": 17.39402275853445,
"best_model_checkpoint": "whisper-small-german-retrain/checkpoint-3000",
"epoch": 130.43478260869566,
"eval_steps": 1000,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.1739130434782608,
"grad_norm": 2.7223403453826904,
"learning_rate": 1.75e-06,
"loss": 0.6229,
"step": 50
},
{
"epoch": 4.3478260869565215,
"grad_norm": 2.2247281074523926,
"learning_rate": 3.5e-06,
"loss": 0.3885,
"step": 100
},
{
"epoch": 6.521739130434782,
"grad_norm": 1.4519989490509033,
"learning_rate": 5.25e-06,
"loss": 0.2281,
"step": 150
},
{
"epoch": 8.695652173913043,
"grad_norm": 1.1439716815948486,
"learning_rate": 7e-06,
"loss": 0.1268,
"step": 200
},
{
"epoch": 10.869565217391305,
"grad_norm": 0.7530131340026855,
"learning_rate": 8.75e-06,
"loss": 0.0546,
"step": 250
},
{
"epoch": 13.043478260869565,
"grad_norm": 1.6841254234313965,
"learning_rate": 1.05e-05,
"loss": 0.0209,
"step": 300
},
{
"epoch": 15.217391304347826,
"grad_norm": 0.25525057315826416,
"learning_rate": 1.2249999999999998e-05,
"loss": 0.0098,
"step": 350
},
{
"epoch": 17.391304347826086,
"grad_norm": 0.227842316031456,
"learning_rate": 1.4e-05,
"loss": 0.0065,
"step": 400
},
{
"epoch": 19.565217391304348,
"grad_norm": 0.9949818253517151,
"learning_rate": 1.575e-05,
"loss": 0.0078,
"step": 450
},
{
"epoch": 21.73913043478261,
"grad_norm": 0.39849549531936646,
"learning_rate": 1.75e-05,
"loss": 0.0078,
"step": 500
},
{
"epoch": 23.91304347826087,
"grad_norm": 0.8715730905532837,
"learning_rate": 1.7305555555555553e-05,
"loss": 0.0071,
"step": 550
},
{
"epoch": 26.08695652173913,
"grad_norm": 0.20837093889713287,
"learning_rate": 1.711111111111111e-05,
"loss": 0.0049,
"step": 600
},
{
"epoch": 28.26086956521739,
"grad_norm": 0.10564578324556351,
"learning_rate": 1.6916666666666667e-05,
"loss": 0.0036,
"step": 650
},
{
"epoch": 30.434782608695652,
"grad_norm": 0.2511199712753296,
"learning_rate": 1.6722222222222222e-05,
"loss": 0.0029,
"step": 700
},
{
"epoch": 32.608695652173914,
"grad_norm": 0.05474904552102089,
"learning_rate": 1.6527777777777777e-05,
"loss": 0.0016,
"step": 750
},
{
"epoch": 34.78260869565217,
"grad_norm": 0.0346197709441185,
"learning_rate": 1.633333333333333e-05,
"loss": 0.0014,
"step": 800
},
{
"epoch": 36.95652173913044,
"grad_norm": 0.022180059924721718,
"learning_rate": 1.613888888888889e-05,
"loss": 0.0017,
"step": 850
},
{
"epoch": 39.130434782608695,
"grad_norm": 0.011027761735022068,
"learning_rate": 1.594444444444444e-05,
"loss": 0.0009,
"step": 900
},
{
"epoch": 41.30434782608695,
"grad_norm": 0.010779116302728653,
"learning_rate": 1.575e-05,
"loss": 0.0007,
"step": 950
},
{
"epoch": 43.47826086956522,
"grad_norm": 0.008251098915934563,
"learning_rate": 1.5555555555555555e-05,
"loss": 0.0006,
"step": 1000
},
{
"epoch": 43.47826086956522,
"eval_loss": 0.5149791240692139,
"eval_runtime": 40.8401,
"eval_samples_per_second": 7.027,
"eval_steps_per_second": 0.22,
"eval_wer": 17.994247842941103,
"step": 1000
},
{
"epoch": 45.65217391304348,
"grad_norm": 0.007410943973809481,
"learning_rate": 1.536111111111111e-05,
"loss": 0.0006,
"step": 1050
},
{
"epoch": 47.82608695652174,
"grad_norm": 0.006867844145745039,
"learning_rate": 1.5166666666666667e-05,
"loss": 0.0005,
"step": 1100
},
{
"epoch": 50.0,
"grad_norm": 0.00803290493786335,
"learning_rate": 1.497222222222222e-05,
"loss": 0.0005,
"step": 1150
},
{
"epoch": 52.17391304347826,
"grad_norm": 0.005789933726191521,
"learning_rate": 1.4777777777777777e-05,
"loss": 0.0005,
"step": 1200
},
{
"epoch": 54.34782608695652,
"grad_norm": 0.005295329727232456,
"learning_rate": 1.4583333333333333e-05,
"loss": 0.0004,
"step": 1250
},
{
"epoch": 56.52173913043478,
"grad_norm": 0.004821299575269222,
"learning_rate": 1.4388888888888886e-05,
"loss": 0.0004,
"step": 1300
},
{
"epoch": 58.69565217391305,
"grad_norm": 0.004488619975745678,
"learning_rate": 1.4194444444444443e-05,
"loss": 0.0004,
"step": 1350
},
{
"epoch": 60.869565217391305,
"grad_norm": 0.0040580639615654945,
"learning_rate": 1.4e-05,
"loss": 0.0004,
"step": 1400
},
{
"epoch": 63.04347826086956,
"grad_norm": 0.004379584453999996,
"learning_rate": 1.3805555555555553e-05,
"loss": 0.0003,
"step": 1450
},
{
"epoch": 65.21739130434783,
"grad_norm": 0.003948231227695942,
"learning_rate": 1.361111111111111e-05,
"loss": 0.0003,
"step": 1500
},
{
"epoch": 67.3913043478261,
"grad_norm": 0.0037093530409038067,
"learning_rate": 1.3416666666666666e-05,
"loss": 0.0003,
"step": 1550
},
{
"epoch": 69.56521739130434,
"grad_norm": 0.0037487975787371397,
"learning_rate": 1.3222222222222221e-05,
"loss": 0.0003,
"step": 1600
},
{
"epoch": 71.73913043478261,
"grad_norm": 0.003608859609812498,
"learning_rate": 1.3027777777777776e-05,
"loss": 0.0003,
"step": 1650
},
{
"epoch": 73.91304347826087,
"grad_norm": 0.0034225275740027428,
"learning_rate": 1.2833333333333331e-05,
"loss": 0.0003,
"step": 1700
},
{
"epoch": 76.08695652173913,
"grad_norm": 0.003319317242130637,
"learning_rate": 1.2638888888888888e-05,
"loss": 0.0003,
"step": 1750
},
{
"epoch": 78.26086956521739,
"grad_norm": 0.003253102768212557,
"learning_rate": 1.2444444444444445e-05,
"loss": 0.0002,
"step": 1800
},
{
"epoch": 80.43478260869566,
"grad_norm": 0.003029271960258484,
"learning_rate": 1.2249999999999998e-05,
"loss": 0.0002,
"step": 1850
},
{
"epoch": 82.6086956521739,
"grad_norm": 0.002931032096967101,
"learning_rate": 1.2055555555555555e-05,
"loss": 0.0002,
"step": 1900
},
{
"epoch": 84.78260869565217,
"grad_norm": 0.002701626857742667,
"learning_rate": 1.1861111111111111e-05,
"loss": 0.0002,
"step": 1950
},
{
"epoch": 86.95652173913044,
"grad_norm": 0.002894147066399455,
"learning_rate": 1.1666666666666665e-05,
"loss": 0.0002,
"step": 2000
},
{
"epoch": 86.95652173913044,
"eval_loss": 0.5610591173171997,
"eval_runtime": 40.2761,
"eval_samples_per_second": 7.126,
"eval_steps_per_second": 0.223,
"eval_wer": 17.456546204826807,
"step": 2000
},
{
"epoch": 89.1304347826087,
"grad_norm": 0.0025357184931635857,
"learning_rate": 1.1472222222222221e-05,
"loss": 0.0002,
"step": 2050
},
{
"epoch": 91.30434782608695,
"grad_norm": 0.0023813284933567047,
"learning_rate": 1.1277777777777778e-05,
"loss": 0.0002,
"step": 2100
},
{
"epoch": 93.47826086956522,
"grad_norm": 0.002499406924471259,
"learning_rate": 1.1083333333333331e-05,
"loss": 0.0002,
"step": 2150
},
{
"epoch": 95.65217391304348,
"grad_norm": 0.002111413050442934,
"learning_rate": 1.0888888888888888e-05,
"loss": 0.0002,
"step": 2200
},
{
"epoch": 97.82608695652173,
"grad_norm": 0.0020701128523796797,
"learning_rate": 1.0694444444444444e-05,
"loss": 0.0002,
"step": 2250
},
{
"epoch": 100.0,
"grad_norm": 0.0028157017659395933,
"learning_rate": 1.05e-05,
"loss": 0.0002,
"step": 2300
},
{
"epoch": 102.17391304347827,
"grad_norm": 0.0020910503808408976,
"learning_rate": 1.0305555555555554e-05,
"loss": 0.0002,
"step": 2350
},
{
"epoch": 104.34782608695652,
"grad_norm": 0.0019400820601731539,
"learning_rate": 1.011111111111111e-05,
"loss": 0.0002,
"step": 2400
},
{
"epoch": 106.52173913043478,
"grad_norm": 0.0016938770422711968,
"learning_rate": 9.916666666666666e-06,
"loss": 0.0002,
"step": 2450
},
{
"epoch": 108.69565217391305,
"grad_norm": 0.0018923500319942832,
"learning_rate": 9.722222222222221e-06,
"loss": 0.0002,
"step": 2500
},
{
"epoch": 110.8695652173913,
"grad_norm": 0.0017991146305575967,
"learning_rate": 9.527777777777776e-06,
"loss": 0.0001,
"step": 2550
},
{
"epoch": 113.04347826086956,
"grad_norm": 0.0018069138750433922,
"learning_rate": 9.333333333333333e-06,
"loss": 0.0001,
"step": 2600
},
{
"epoch": 115.21739130434783,
"grad_norm": 0.0017327632522210479,
"learning_rate": 9.13888888888889e-06,
"loss": 0.0001,
"step": 2650
},
{
"epoch": 117.3913043478261,
"grad_norm": 0.0015717543428763747,
"learning_rate": 8.944444444444443e-06,
"loss": 0.0001,
"step": 2700
},
{
"epoch": 119.56521739130434,
"grad_norm": 0.001588036073371768,
"learning_rate": 8.75e-06,
"loss": 0.0001,
"step": 2750
},
{
"epoch": 121.73913043478261,
"grad_norm": 0.0015617014141753316,
"learning_rate": 8.555555555555554e-06,
"loss": 0.0001,
"step": 2800
},
{
"epoch": 123.91304347826087,
"grad_norm": 0.0015488613862544298,
"learning_rate": 8.361111111111111e-06,
"loss": 0.0001,
"step": 2850
},
{
"epoch": 126.08695652173913,
"grad_norm": 0.0016123673412948847,
"learning_rate": 8.166666666666666e-06,
"loss": 0.0001,
"step": 2900
},
{
"epoch": 128.2608695652174,
"grad_norm": 0.0014842275995761156,
"learning_rate": 7.97222222222222e-06,
"loss": 0.0001,
"step": 2950
},
{
"epoch": 130.43478260869566,
"grad_norm": 0.001505482941865921,
"learning_rate": 7.777777777777777e-06,
"loss": 0.0001,
"step": 3000
},
{
"epoch": 130.43478260869566,
"eval_loss": 0.5873442888259888,
"eval_runtime": 40.5248,
"eval_samples_per_second": 7.082,
"eval_steps_per_second": 0.222,
"eval_wer": 17.39402275853445,
"step": 3000
}
],
"logging_steps": 50,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 218,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.43204299022336e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}