reach-vb's picture
reach-vb HF staff
Upload folder using huggingface_hub
6a08c5a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9987505206164098,
"eval_steps": 240,
"global_step": 4800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00041649312786339027,
"grad_norm": 7.0,
"learning_rate": 2e-06,
"loss": 0.7314,
"step": 1
},
{
"epoch": 0.04164931278633902,
"grad_norm": 0.0712890625,
"learning_rate": 0.0002,
"loss": 0.377,
"step": 100
},
{
"epoch": 0.08329862557267805,
"grad_norm": 0.12109375,
"learning_rate": 0.0004,
"loss": 0.2401,
"step": 200
},
{
"epoch": 0.09995835068721366,
"eval_peoplespeech-clean-transcription_loss": 3.7064812183380127,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.4755,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.421,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069,
"step": 240
},
{
"epoch": 0.12494793835901707,
"grad_norm": 0.10791015625,
"learning_rate": 0.0006,
"loss": 0.209,
"step": 300
},
{
"epoch": 0.1665972511453561,
"grad_norm": 0.08740234375,
"learning_rate": 0.0008,
"loss": 0.1586,
"step": 400
},
{
"epoch": 0.19991670137442732,
"eval_peoplespeech-clean-transcription_loss": 2.0534801483154297,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.9793,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.578,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 480
},
{
"epoch": 0.20824656393169513,
"grad_norm": 0.059814453125,
"learning_rate": 0.001,
"loss": 0.1178,
"step": 500
},
{
"epoch": 0.24989587671803415,
"grad_norm": 0.05126953125,
"learning_rate": 0.0012,
"loss": 0.1031,
"step": 600
},
{
"epoch": 0.2915451895043732,
"grad_norm": 0.04443359375,
"learning_rate": 0.0014,
"loss": 0.0942,
"step": 700
},
{
"epoch": 0.299875052061641,
"eval_peoplespeech-clean-transcription_loss": 1.931348443031311,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.1549,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.521,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.071,
"step": 720
},
{
"epoch": 0.3331945022907122,
"grad_norm": 0.040283203125,
"learning_rate": 0.0016,
"loss": 0.0885,
"step": 800
},
{
"epoch": 0.3748438150770512,
"grad_norm": 0.035888671875,
"learning_rate": 0.0018000000000000002,
"loss": 0.0853,
"step": 900
},
{
"epoch": 0.39983340274885465,
"eval_peoplespeech-clean-transcription_loss": 1.8942928314208984,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.7985,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.638,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 960
},
{
"epoch": 0.41649312786339027,
"grad_norm": 0.033203125,
"learning_rate": 0.002,
"loss": 0.0817,
"step": 1000
},
{
"epoch": 0.45814244064972925,
"grad_norm": 0.03173828125,
"learning_rate": 0.001996926043706003,
"loss": 0.0798,
"step": 1100
},
{
"epoch": 0.4997917534360683,
"grad_norm": 0.029296875,
"learning_rate": 0.0019877251730624503,
"loss": 0.0777,
"step": 1200
},
{
"epoch": 0.4997917534360683,
"eval_peoplespeech-clean-transcription_loss": 1.863105297088623,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.1079,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.883,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.076,
"step": 1200
},
{
"epoch": 0.5414410662224073,
"grad_norm": 0.028076171875,
"learning_rate": 0.0019724602393453973,
"loss": 0.0761,
"step": 1300
},
{
"epoch": 0.5830903790087464,
"grad_norm": 0.0277099609375,
"learning_rate": 0.001951235517530571,
"loss": 0.0744,
"step": 1400
},
{
"epoch": 0.599750104123282,
"eval_peoplespeech-clean-transcription_loss": 1.8575688600540161,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.2046,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.506,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.07,
"step": 1440
},
{
"epoch": 0.6247396917950854,
"grad_norm": 0.02490234375,
"learning_rate": 0.0019241959939895518,
"loss": 0.073,
"step": 1500
},
{
"epoch": 0.6663890045814244,
"grad_norm": 0.0263671875,
"learning_rate": 0.0018915263760858401,
"loss": 0.071,
"step": 1600
},
{
"epoch": 0.6997084548104956,
"eval_peoplespeech-clean-transcription_loss": 1.83821439743042,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.7329,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.66,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 1680
},
{
"epoch": 0.7080383173677635,
"grad_norm": 0.0252685546875,
"learning_rate": 0.0018534498304362756,
"loss": 0.0704,
"step": 1700
},
{
"epoch": 0.7496876301541024,
"grad_norm": 0.027587890625,
"learning_rate": 0.0018102264584567542,
"loss": 0.0693,
"step": 1800
},
{
"epoch": 0.7913369429404414,
"grad_norm": 0.0264892578125,
"learning_rate": 0.0017621515196058187,
"loss": 0.0679,
"step": 1900
},
{
"epoch": 0.7996668054977093,
"eval_peoplespeech-clean-transcription_loss": 1.835157036781311,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.3808,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.783,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.075,
"step": 1920
},
{
"epoch": 0.8329862557267805,
"grad_norm": 0.02294921875,
"learning_rate": 0.001709553414463167,
"loss": 0.0678,
"step": 2000
},
{
"epoch": 0.8746355685131195,
"grad_norm": 0.0220947265625,
"learning_rate": 0.0016527914414207012,
"loss": 0.0669,
"step": 2100
},
{
"epoch": 0.899625156184923,
"eval_peoplespeech-clean-transcription_loss": 1.822905421257019,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.1096,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.882,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.076,
"step": 2160
},
{
"epoch": 0.9162848812994585,
"grad_norm": 0.0244140625,
"learning_rate": 0.0015922533423101844,
"loss": 0.0659,
"step": 2200
},
{
"epoch": 0.9579341940857976,
"grad_norm": 0.0255126953125,
"learning_rate": 0.0015283526537333662,
"loss": 0.0658,
"step": 2300
},
{
"epoch": 0.9995835068721366,
"grad_norm": 0.0225830078125,
"learning_rate": 0.0014615258821876727,
"loss": 0.0653,
"step": 2400
},
{
"epoch": 0.9995835068721366,
"eval_peoplespeech-clean-transcription_loss": 1.8253135681152344,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.4347,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.764,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.074,
"step": 2400
},
{
"epoch": 1.0408163265306123,
"grad_norm": 0.0264892578125,
"learning_rate": 0.0013922295222842153,
"loss": 0.0606,
"step": 2500
},
{
"epoch": 1.0824656393169512,
"grad_norm": 0.0205078125,
"learning_rate": 0.0013209369384267193,
"loss": 0.0639,
"step": 2600
},
{
"epoch": 1.099125364431487,
"eval_peoplespeech-clean-transcription_loss": 1.7945704460144043,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.4627,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.425,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069,
"step": 2640
},
{
"epoch": 1.1241149521032903,
"grad_norm": 0.0218505859375,
"learning_rate": 0.0012481351312526605,
"loss": 0.0637,
"step": 2700
},
{
"epoch": 1.1657642648896294,
"grad_norm": 0.0225830078125,
"learning_rate": 0.0011743214109250992,
"loss": 0.0636,
"step": 2800
},
{
"epoch": 1.1990837151187006,
"eval_peoplespeech-clean-transcription_loss": 1.799329400062561,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.4877,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.745,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.074,
"step": 2880
},
{
"epoch": 1.2074135776759682,
"grad_norm": 0.0216064453125,
"learning_rate": 0.0011,
"loss": 0.063,
"step": 2900
},
{
"epoch": 1.2490628904623073,
"grad_norm": 0.0230712890625,
"learning_rate": 0.001025678589074901,
"loss": 0.0625,
"step": 3000
},
{
"epoch": 1.2907122032486464,
"grad_norm": 0.025146484375,
"learning_rate": 0.0009518648687473394,
"loss": 0.0615,
"step": 3100
},
{
"epoch": 1.299042065805914,
"eval_peoplespeech-clean-transcription_loss": 1.777367353439331,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.9312,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.594,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 3120
},
{
"epoch": 1.3323615160349855,
"grad_norm": 0.0233154296875,
"learning_rate": 0.0008790630615732809,
"loss": 0.0608,
"step": 3200
},
{
"epoch": 1.3740108288213244,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0008077704777157851,
"loss": 0.0608,
"step": 3300
},
{
"epoch": 1.3990004164931278,
"eval_peoplespeech-clean-transcription_loss": 1.7695162296295166,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.6148,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.701,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 3360
},
{
"epoch": 1.4156601416076635,
"grad_norm": 0.02099609375,
"learning_rate": 0.0007384741178123277,
"loss": 0.0596,
"step": 3400
},
{
"epoch": 1.4573094543940024,
"grad_norm": 0.021240234375,
"learning_rate": 0.0006716473462666338,
"loss": 0.0595,
"step": 3500
},
{
"epoch": 1.4989587671803415,
"grad_norm": 0.0216064453125,
"learning_rate": 0.0006077466576898161,
"loss": 0.0591,
"step": 3600
},
{
"epoch": 1.4989587671803415,
"eval_peoplespeech-clean-transcription_loss": 1.7758369445800781,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.8514,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.62,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 3600
},
{
"epoch": 1.5406080799666806,
"grad_norm": 0.0220947265625,
"learning_rate": 0.000547208558579299,
"loss": 0.0589,
"step": 3700
},
{
"epoch": 1.5822573927530197,
"grad_norm": 0.0208740234375,
"learning_rate": 0.0004904465855368333,
"loss": 0.0582,
"step": 3800
},
{
"epoch": 1.5989171178675552,
"eval_peoplespeech-clean-transcription_loss": 1.7663408517837524,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.1527,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.522,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.071,
"step": 3840
},
{
"epoch": 1.6239067055393586,
"grad_norm": 0.0198974609375,
"learning_rate": 0.0004378484803941816,
"loss": 0.0579,
"step": 3900
},
{
"epoch": 1.6655560183256977,
"grad_norm": 0.019775390625,
"learning_rate": 0.0003897735415432459,
"loss": 0.0567,
"step": 4000
},
{
"epoch": 1.698875468554769,
"eval_peoplespeech-clean-transcription_loss": 1.767942190170288,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.5529,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.398,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069,
"step": 4080
},
{
"epoch": 1.7072053311120365,
"grad_norm": 0.02001953125,
"learning_rate": 0.00034655016956372433,
"loss": 0.0569,
"step": 4100
},
{
"epoch": 1.7488546438983756,
"grad_norm": 0.0203857421875,
"learning_rate": 0.00030847362391415995,
"loss": 0.0564,
"step": 4200
},
{
"epoch": 1.7905039566847147,
"grad_norm": 0.019287109375,
"learning_rate": 0.00027580400601044826,
"loss": 0.0557,
"step": 4300
},
{
"epoch": 1.7988338192419824,
"eval_peoplespeech-clean-transcription_loss": 1.759714961051941,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.9077,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.602,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 4320
},
{
"epoch": 1.8321532694710538,
"grad_norm": 0.023193359375,
"learning_rate": 0.00024876448246942883,
"loss": 0.0559,
"step": 4400
},
{
"epoch": 1.8738025822573927,
"grad_norm": 0.0206298828125,
"learning_rate": 0.0002275397606546027,
"loss": 0.0557,
"step": 4500
},
{
"epoch": 1.8987921699291963,
"eval_peoplespeech-clean-transcription_loss": 1.7603609561920166,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.6582,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.686,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 4560
},
{
"epoch": 1.9154518950437318,
"grad_norm": 0.020263671875,
"learning_rate": 0.00021227482693754994,
"loss": 0.0551,
"step": 4600
},
{
"epoch": 1.9571012078300707,
"grad_norm": 0.02099609375,
"learning_rate": 0.00020307395629399715,
"loss": 0.055,
"step": 4700
},
{
"epoch": 1.9987505206164098,
"grad_norm": 0.01806640625,
"learning_rate": 0.0002,
"loss": 0.054,
"step": 4800
},
{
"epoch": 1.9987505206164098,
"eval_peoplespeech-clean-transcription_loss": 1.7643760442733765,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.0636,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.551,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.071,
"step": 4800
}
],
"logging_steps": 100,
"max_steps": 4800,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.181752463303639e+18,
"train_batch_size": 576,
"trial_name": null,
"trial_params": null
}