{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4989587671803415, "eval_steps": 240, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00041649312786339027, "grad_norm": 7.0, "learning_rate": 2e-06, "loss": 0.7314, "step": 1 }, { "epoch": 0.04164931278633902, "grad_norm": 0.0712890625, "learning_rate": 0.0002, "loss": 0.377, "step": 100 }, { "epoch": 0.08329862557267805, "grad_norm": 0.12109375, "learning_rate": 0.0004, "loss": 0.2401, "step": 200 }, { "epoch": 0.09995835068721366, "eval_peoplespeech-clean-transcription_loss": 3.7064812183380127, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.4755, "eval_peoplespeech-clean-transcription_samples_per_second": 4.421, "eval_peoplespeech-clean-transcription_steps_per_second": 0.069, "step": 240 }, { "epoch": 0.12494793835901707, "grad_norm": 0.10791015625, "learning_rate": 0.0006, "loss": 0.209, "step": 300 }, { "epoch": 0.1665972511453561, "grad_norm": 0.08740234375, "learning_rate": 0.0008, "loss": 0.1586, "step": 400 }, { "epoch": 0.19991670137442732, "eval_peoplespeech-clean-transcription_loss": 2.0534801483154297, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.9793, "eval_peoplespeech-clean-transcription_samples_per_second": 4.578, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 480 }, { "epoch": 0.20824656393169513, "grad_norm": 0.059814453125, "learning_rate": 0.001, "loss": 0.1178, "step": 500 }, { "epoch": 0.24989587671803415, "grad_norm": 0.05126953125, "learning_rate": 0.0012, "loss": 0.1031, "step": 600 }, { "epoch": 0.2915451895043732, "grad_norm": 0.04443359375, "learning_rate": 0.0014, "loss": 0.0942, "step": 700 }, { "epoch": 0.299875052061641, "eval_peoplespeech-clean-transcription_loss": 1.931348443031311, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.1549, "eval_peoplespeech-clean-transcription_samples_per_second": 4.521, "eval_peoplespeech-clean-transcription_steps_per_second": 0.071, "step": 720 }, { "epoch": 0.3331945022907122, "grad_norm": 0.040283203125, "learning_rate": 0.0016, "loss": 0.0885, "step": 800 }, { "epoch": 0.3748438150770512, "grad_norm": 0.035888671875, "learning_rate": 0.0018000000000000002, "loss": 0.0853, "step": 900 }, { "epoch": 0.39983340274885465, "eval_peoplespeech-clean-transcription_loss": 1.8942928314208984, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.7985, "eval_peoplespeech-clean-transcription_samples_per_second": 4.638, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 960 }, { "epoch": 0.41649312786339027, "grad_norm": 0.033203125, "learning_rate": 0.002, "loss": 0.0817, "step": 1000 }, { "epoch": 0.45814244064972925, "grad_norm": 0.03173828125, "learning_rate": 0.001996926043706003, "loss": 0.0798, "step": 1100 }, { "epoch": 0.4997917534360683, "grad_norm": 0.029296875, "learning_rate": 0.0019877251730624503, "loss": 0.0777, "step": 1200 }, { "epoch": 0.4997917534360683, "eval_peoplespeech-clean-transcription_loss": 1.863105297088623, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.1079, "eval_peoplespeech-clean-transcription_samples_per_second": 4.883, "eval_peoplespeech-clean-transcription_steps_per_second": 0.076, "step": 1200 }, { "epoch": 0.5414410662224073, "grad_norm": 0.028076171875, "learning_rate": 0.0019724602393453973, "loss": 0.0761, "step": 1300 }, { "epoch": 0.5830903790087464, "grad_norm": 0.0277099609375, "learning_rate": 0.001951235517530571, "loss": 0.0744, "step": 1400 }, { "epoch": 0.599750104123282, "eval_peoplespeech-clean-transcription_loss": 1.8575688600540161, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.2046, "eval_peoplespeech-clean-transcription_samples_per_second": 4.506, "eval_peoplespeech-clean-transcription_steps_per_second": 0.07, "step": 1440 }, { "epoch": 0.6247396917950854, "grad_norm": 0.02490234375, "learning_rate": 0.0019241959939895518, "loss": 0.073, "step": 1500 }, { "epoch": 0.6663890045814244, "grad_norm": 0.0263671875, "learning_rate": 0.0018915263760858401, "loss": 0.071, "step": 1600 }, { "epoch": 0.6997084548104956, "eval_peoplespeech-clean-transcription_loss": 1.83821439743042, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.7329, "eval_peoplespeech-clean-transcription_samples_per_second": 4.66, "eval_peoplespeech-clean-transcription_steps_per_second": 0.073, "step": 1680 }, { "epoch": 0.7080383173677635, "grad_norm": 0.0252685546875, "learning_rate": 0.0018534498304362756, "loss": 0.0704, "step": 1700 }, { "epoch": 0.7496876301541024, "grad_norm": 0.027587890625, "learning_rate": 0.0018102264584567542, "loss": 0.0693, "step": 1800 }, { "epoch": 0.7913369429404414, "grad_norm": 0.0264892578125, "learning_rate": 0.0017621515196058187, "loss": 0.0679, "step": 1900 }, { "epoch": 0.7996668054977093, "eval_peoplespeech-clean-transcription_loss": 1.835157036781311, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.3808, "eval_peoplespeech-clean-transcription_samples_per_second": 4.783, "eval_peoplespeech-clean-transcription_steps_per_second": 0.075, "step": 1920 }, { "epoch": 0.8329862557267805, "grad_norm": 0.02294921875, "learning_rate": 0.001709553414463167, "loss": 0.0678, "step": 2000 }, { "epoch": 0.8746355685131195, "grad_norm": 0.0220947265625, "learning_rate": 0.0016527914414207012, "loss": 0.0669, "step": 2100 }, { "epoch": 0.899625156184923, "eval_peoplespeech-clean-transcription_loss": 1.822905421257019, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.1096, "eval_peoplespeech-clean-transcription_samples_per_second": 4.882, "eval_peoplespeech-clean-transcription_steps_per_second": 0.076, "step": 2160 }, { "epoch": 0.9162848812994585, "grad_norm": 0.0244140625, "learning_rate": 0.0015922533423101844, "loss": 0.0659, "step": 2200 }, { "epoch": 0.9579341940857976, "grad_norm": 0.0255126953125, "learning_rate": 0.0015283526537333662, "loss": 0.0658, "step": 2300 }, { "epoch": 0.9995835068721366, "grad_norm": 0.0225830078125, "learning_rate": 0.0014615258821876727, "loss": 0.0653, "step": 2400 }, { "epoch": 0.9995835068721366, "eval_peoplespeech-clean-transcription_loss": 1.8253135681152344, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.4347, "eval_peoplespeech-clean-transcription_samples_per_second": 4.764, "eval_peoplespeech-clean-transcription_steps_per_second": 0.074, "step": 2400 }, { "epoch": 1.0408163265306123, "grad_norm": 0.0264892578125, "learning_rate": 0.0013922295222842153, "loss": 0.0606, "step": 2500 }, { "epoch": 1.0824656393169512, "grad_norm": 0.0205078125, "learning_rate": 0.0013209369384267193, "loss": 0.0639, "step": 2600 }, { "epoch": 1.099125364431487, "eval_peoplespeech-clean-transcription_loss": 1.7945704460144043, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.4627, "eval_peoplespeech-clean-transcription_samples_per_second": 4.425, "eval_peoplespeech-clean-transcription_steps_per_second": 0.069, "step": 2640 }, { "epoch": 1.1241149521032903, "grad_norm": 0.0218505859375, "learning_rate": 0.0012481351312526605, "loss": 0.0637, "step": 2700 }, { "epoch": 1.1657642648896294, "grad_norm": 0.0225830078125, "learning_rate": 0.0011743214109250992, "loss": 0.0636, "step": 2800 }, { "epoch": 1.1990837151187006, "eval_peoplespeech-clean-transcription_loss": 1.799329400062561, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.4877, "eval_peoplespeech-clean-transcription_samples_per_second": 4.745, "eval_peoplespeech-clean-transcription_steps_per_second": 0.074, "step": 2880 }, { "epoch": 1.2074135776759682, "grad_norm": 0.0216064453125, "learning_rate": 0.0011, "loss": 0.063, "step": 2900 }, { "epoch": 1.2490628904623073, "grad_norm": 0.0230712890625, "learning_rate": 0.001025678589074901, "loss": 0.0625, "step": 3000 }, { "epoch": 1.2907122032486464, "grad_norm": 0.025146484375, "learning_rate": 0.0009518648687473394, "loss": 0.0615, "step": 3100 }, { "epoch": 1.299042065805914, "eval_peoplespeech-clean-transcription_loss": 1.777367353439331, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.9312, "eval_peoplespeech-clean-transcription_samples_per_second": 4.594, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 3120 }, { "epoch": 1.3323615160349855, "grad_norm": 0.0233154296875, "learning_rate": 0.0008790630615732809, "loss": 0.0608, "step": 3200 }, { "epoch": 1.3740108288213244, "grad_norm": 0.0235595703125, "learning_rate": 0.0008077704777157851, "loss": 0.0608, "step": 3300 }, { "epoch": 1.3990004164931278, "eval_peoplespeech-clean-transcription_loss": 1.7695162296295166, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.6148, "eval_peoplespeech-clean-transcription_samples_per_second": 4.701, "eval_peoplespeech-clean-transcription_steps_per_second": 0.073, "step": 3360 }, { "epoch": 1.4156601416076635, "grad_norm": 0.02099609375, "learning_rate": 0.0007384741178123277, "loss": 0.0596, "step": 3400 }, { "epoch": 1.4573094543940024, "grad_norm": 0.021240234375, "learning_rate": 0.0006716473462666338, "loss": 0.0595, "step": 3500 }, { "epoch": 1.4989587671803415, "grad_norm": 0.0216064453125, "learning_rate": 0.0006077466576898161, "loss": 0.0591, "step": 3600 }, { "epoch": 1.4989587671803415, "eval_peoplespeech-clean-transcription_loss": 1.7758369445800781, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.8514, "eval_peoplespeech-clean-transcription_samples_per_second": 4.62, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 3600 } ], "logging_steps": 100, "max_steps": 4800, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.3862866198064e+18, "train_batch_size": 576, "trial_name": null, "trial_params": null }