|
{ |
|
"best_global_step": 1500, |
|
"best_metric": 0.10838954150676727, |
|
"best_model_checkpoint": "./whisper-small-splitted/checkpoint-1500", |
|
"epoch": 12.656151419558359, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10515247108307045, |
|
"grad_norm": 37.98896408081055, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 4.0211, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2103049421661409, |
|
"grad_norm": 28.073650360107422, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 3.1838, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31545741324921134, |
|
"grad_norm": 12.10400676727295, |
|
"learning_rate": 1.48e-06, |
|
"loss": 2.0459, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4206098843322818, |
|
"grad_norm": 8.05373477935791, |
|
"learning_rate": 1.98e-06, |
|
"loss": 1.3254, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5257623554153522, |
|
"grad_norm": 6.276533126831055, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 1.0124, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6309148264984227, |
|
"grad_norm": 5.450071334838867, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 0.7972, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7360672975814931, |
|
"grad_norm": 6.463825225830078, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.6987, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8412197686645636, |
|
"grad_norm": 6.113511085510254, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.6371, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9463722397476341, |
|
"grad_norm": 5.203503131866455, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.5403, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0546792849631967, |
|
"grad_norm": 3.6247618198394775, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.4597, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.159831756046267, |
|
"grad_norm": 4.7159552574157715, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.4074, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.2649842271293377, |
|
"grad_norm": 5.012338638305664, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.3846, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.370136698212408, |
|
"grad_norm": 4.738580703735352, |
|
"learning_rate": 6.480000000000001e-06, |
|
"loss": 0.3306, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.4752891692954784, |
|
"grad_norm": 4.1813578605651855, |
|
"learning_rate": 6.98e-06, |
|
"loss": 0.3145, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5804416403785488, |
|
"grad_norm": 5.1083855628967285, |
|
"learning_rate": 7.48e-06, |
|
"loss": 0.2901, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.6855941114616193, |
|
"grad_norm": 3.654137372970581, |
|
"learning_rate": 7.980000000000002e-06, |
|
"loss": 0.2473, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7907465825446898, |
|
"grad_norm": 5.576925754547119, |
|
"learning_rate": 8.48e-06, |
|
"loss": 0.2408, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.8958990536277602, |
|
"grad_norm": 5.050299167633057, |
|
"learning_rate": 8.98e-06, |
|
"loss": 0.222, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.004206098843323, |
|
"grad_norm": 4.0772705078125, |
|
"learning_rate": 9.48e-06, |
|
"loss": 0.2311, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.1093585699263935, |
|
"grad_norm": 3.189274311065674, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 0.1324, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1093585699263935, |
|
"eval_loss": 0.1909431368112564, |
|
"eval_runtime": 1339.9903, |
|
"eval_samples_per_second": 2.432, |
|
"eval_steps_per_second": 0.608, |
|
"eval_wer": 0.3306828586574141, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.2145110410094637, |
|
"grad_norm": 3.6630983352661133, |
|
"learning_rate": 9.982507288629738e-06, |
|
"loss": 0.1425, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.319663512092534, |
|
"grad_norm": 3.793405532836914, |
|
"learning_rate": 9.964285714285714e-06, |
|
"loss": 0.1394, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.4248159831756047, |
|
"grad_norm": 3.442934036254883, |
|
"learning_rate": 9.94606413994169e-06, |
|
"loss": 0.1445, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.5299684542586753, |
|
"grad_norm": 2.4245970249176025, |
|
"learning_rate": 9.927842565597668e-06, |
|
"loss": 0.1247, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.6351209253417456, |
|
"grad_norm": 2.134147882461548, |
|
"learning_rate": 9.909620991253646e-06, |
|
"loss": 0.1246, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.740273396424816, |
|
"grad_norm": 3.135070562362671, |
|
"learning_rate": 9.891399416909622e-06, |
|
"loss": 0.1233, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.8454258675078865, |
|
"grad_norm": 2.723932981491089, |
|
"learning_rate": 9.873177842565598e-06, |
|
"loss": 0.119, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.9505783385909568, |
|
"grad_norm": 3.9131784439086914, |
|
"learning_rate": 9.854956268221574e-06, |
|
"loss": 0.1334, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0588853838065195, |
|
"grad_norm": 4.044662952423096, |
|
"learning_rate": 9.836734693877552e-06, |
|
"loss": 0.0902, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.1640378548895898, |
|
"grad_norm": 2.1829943656921387, |
|
"learning_rate": 9.818513119533528e-06, |
|
"loss": 0.0702, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2691903259726605, |
|
"grad_norm": 2.548558235168457, |
|
"learning_rate": 9.800291545189506e-06, |
|
"loss": 0.0712, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.3743427970557307, |
|
"grad_norm": 1.3480279445648193, |
|
"learning_rate": 9.782069970845482e-06, |
|
"loss": 0.0654, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.4794952681388014, |
|
"grad_norm": 2.091961622238159, |
|
"learning_rate": 9.763848396501458e-06, |
|
"loss": 0.0658, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.5846477392218716, |
|
"grad_norm": 2.2356009483337402, |
|
"learning_rate": 9.745626822157436e-06, |
|
"loss": 0.0682, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.6898002103049423, |
|
"grad_norm": 2.5908384323120117, |
|
"learning_rate": 9.727405247813412e-06, |
|
"loss": 0.056, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.7949526813880126, |
|
"grad_norm": 2.618177890777588, |
|
"learning_rate": 9.70918367346939e-06, |
|
"loss": 0.0623, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.900105152471083, |
|
"grad_norm": 2.1948764324188232, |
|
"learning_rate": 9.690962099125366e-06, |
|
"loss": 0.0752, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.008412197686646, |
|
"grad_norm": 1.5540448427200317, |
|
"learning_rate": 9.672740524781342e-06, |
|
"loss": 0.0638, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.113564668769716, |
|
"grad_norm": 1.905039668083191, |
|
"learning_rate": 9.654518950437318e-06, |
|
"loss": 0.0397, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 4.218717139852787, |
|
"grad_norm": 2.011355400085449, |
|
"learning_rate": 9.636297376093296e-06, |
|
"loss": 0.0383, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.218717139852787, |
|
"eval_loss": 0.1153440847992897, |
|
"eval_runtime": 1137.0265, |
|
"eval_samples_per_second": 2.866, |
|
"eval_steps_per_second": 0.717, |
|
"eval_wer": 0.11305479254011855, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.323869610935857, |
|
"grad_norm": 1.983497142791748, |
|
"learning_rate": 9.618075801749272e-06, |
|
"loss": 0.0376, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 4.429022082018927, |
|
"grad_norm": 2.7597391605377197, |
|
"learning_rate": 9.599854227405248e-06, |
|
"loss": 0.0384, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.534174553101998, |
|
"grad_norm": 2.3190319538116455, |
|
"learning_rate": 9.581632653061226e-06, |
|
"loss": 0.041, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 4.639327024185068, |
|
"grad_norm": 1.6487782001495361, |
|
"learning_rate": 9.563411078717202e-06, |
|
"loss": 0.0373, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.744479495268139, |
|
"grad_norm": 1.493482232093811, |
|
"learning_rate": 9.54518950437318e-06, |
|
"loss": 0.0453, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 4.849631966351209, |
|
"grad_norm": 1.9630647897720337, |
|
"learning_rate": 9.526967930029156e-06, |
|
"loss": 0.0447, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.95478443743428, |
|
"grad_norm": 2.1700949668884277, |
|
"learning_rate": 9.508746355685132e-06, |
|
"loss": 0.0416, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 5.063091482649842, |
|
"grad_norm": 2.0949151515960693, |
|
"learning_rate": 9.490524781341108e-06, |
|
"loss": 0.0296, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.168243953732913, |
|
"grad_norm": 1.232323169708252, |
|
"learning_rate": 9.472303206997086e-06, |
|
"loss": 0.0227, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 5.273396424815983, |
|
"grad_norm": 6.17132568359375, |
|
"learning_rate": 9.454081632653062e-06, |
|
"loss": 0.0284, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.378548895899054, |
|
"grad_norm": 0.9878859519958496, |
|
"learning_rate": 9.43586005830904e-06, |
|
"loss": 0.0211, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 5.483701366982124, |
|
"grad_norm": 2.1974642276763916, |
|
"learning_rate": 9.417638483965016e-06, |
|
"loss": 0.0203, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.588853838065194, |
|
"grad_norm": 1.2606751918792725, |
|
"learning_rate": 9.399416909620992e-06, |
|
"loss": 0.0271, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 5.694006309148265, |
|
"grad_norm": 1.851982593536377, |
|
"learning_rate": 9.38119533527697e-06, |
|
"loss": 0.0271, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.799158780231336, |
|
"grad_norm": 1.9509834051132202, |
|
"learning_rate": 9.362973760932945e-06, |
|
"loss": 0.0261, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.904311251314406, |
|
"grad_norm": 1.3138453960418701, |
|
"learning_rate": 9.344752186588922e-06, |
|
"loss": 0.0269, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.012618296529968, |
|
"grad_norm": 1.0709035396575928, |
|
"learning_rate": 9.326530612244898e-06, |
|
"loss": 0.0292, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 6.117770767613039, |
|
"grad_norm": 0.9309059381484985, |
|
"learning_rate": 9.308309037900875e-06, |
|
"loss": 0.0128, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.22292323869611, |
|
"grad_norm": 1.078444004058838, |
|
"learning_rate": 9.290087463556851e-06, |
|
"loss": 0.0189, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 6.3280757097791795, |
|
"grad_norm": 2.0890743732452393, |
|
"learning_rate": 9.27186588921283e-06, |
|
"loss": 0.0184, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.3280757097791795, |
|
"eval_loss": 0.10838954150676727, |
|
"eval_runtime": 1142.8117, |
|
"eval_samples_per_second": 2.852, |
|
"eval_steps_per_second": 0.713, |
|
"eval_wer": 0.11257288805358778, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.43322818086225, |
|
"grad_norm": 0.5473038554191589, |
|
"learning_rate": 9.253644314868805e-06, |
|
"loss": 0.0153, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 6.538380651945321, |
|
"grad_norm": 1.075249433517456, |
|
"learning_rate": 9.235422740524781e-06, |
|
"loss": 0.0172, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.643533123028391, |
|
"grad_norm": 1.237701177597046, |
|
"learning_rate": 9.21720116618076e-06, |
|
"loss": 0.0167, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 6.748685594111461, |
|
"grad_norm": 1.9220877885818481, |
|
"learning_rate": 9.198979591836735e-06, |
|
"loss": 0.0175, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.853838065194532, |
|
"grad_norm": 1.435571312904358, |
|
"learning_rate": 9.180758017492713e-06, |
|
"loss": 0.0182, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 6.958990536277603, |
|
"grad_norm": 1.9936102628707886, |
|
"learning_rate": 9.162536443148689e-06, |
|
"loss": 0.0158, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.067297581493165, |
|
"grad_norm": 1.101319670677185, |
|
"learning_rate": 9.144314868804665e-06, |
|
"loss": 0.0147, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 7.172450052576235, |
|
"grad_norm": 1.3670756816864014, |
|
"learning_rate": 9.126093294460641e-06, |
|
"loss": 0.0127, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.277602523659306, |
|
"grad_norm": 0.6474692225456238, |
|
"learning_rate": 9.107871720116619e-06, |
|
"loss": 0.0107, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 7.382754994742377, |
|
"grad_norm": 0.941483736038208, |
|
"learning_rate": 9.089650145772595e-06, |
|
"loss": 0.0129, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.4879074658254465, |
|
"grad_norm": 0.7835807204246521, |
|
"learning_rate": 9.071428571428573e-06, |
|
"loss": 0.015, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 7.593059936908517, |
|
"grad_norm": 0.7577527761459351, |
|
"learning_rate": 9.053206997084549e-06, |
|
"loss": 0.0119, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.698212407991588, |
|
"grad_norm": 1.8214268684387207, |
|
"learning_rate": 9.034985422740525e-06, |
|
"loss": 0.0116, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 7.803364879074659, |
|
"grad_norm": 0.8769547343254089, |
|
"learning_rate": 9.016763848396503e-06, |
|
"loss": 0.0131, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.908517350157728, |
|
"grad_norm": 1.227581262588501, |
|
"learning_rate": 8.998542274052479e-06, |
|
"loss": 0.012, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 8.016824395373291, |
|
"grad_norm": 1.28843355178833, |
|
"learning_rate": 8.980320699708455e-06, |
|
"loss": 0.0134, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.121976866456361, |
|
"grad_norm": 1.5814619064331055, |
|
"learning_rate": 8.962099125364431e-06, |
|
"loss": 0.0086, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 8.227129337539433, |
|
"grad_norm": 0.8933718800544739, |
|
"learning_rate": 8.943877551020409e-06, |
|
"loss": 0.0089, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8.332281808622502, |
|
"grad_norm": 0.9096396565437317, |
|
"learning_rate": 8.925655976676385e-06, |
|
"loss": 0.0097, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 8.437434279705574, |
|
"grad_norm": 0.42109328508377075, |
|
"learning_rate": 8.907434402332363e-06, |
|
"loss": 0.0098, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.437434279705574, |
|
"eval_loss": 0.11219166219234467, |
|
"eval_runtime": 1135.5499, |
|
"eval_samples_per_second": 2.87, |
|
"eval_steps_per_second": 0.718, |
|
"eval_wer": 0.10139270396607393, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.542586750788644, |
|
"grad_norm": 1.2390159368515015, |
|
"learning_rate": 8.889212827988339e-06, |
|
"loss": 0.0106, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 8.647739221871714, |
|
"grad_norm": 0.9200573563575745, |
|
"learning_rate": 8.870991253644315e-06, |
|
"loss": 0.0082, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8.752891692954785, |
|
"grad_norm": 0.814224362373352, |
|
"learning_rate": 8.852769679300293e-06, |
|
"loss": 0.0105, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 8.858044164037855, |
|
"grad_norm": 0.7412490248680115, |
|
"learning_rate": 8.834548104956269e-06, |
|
"loss": 0.0101, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.963196635120925, |
|
"grad_norm": 0.8766764402389526, |
|
"learning_rate": 8.816326530612247e-06, |
|
"loss": 0.0092, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 9.071503680336487, |
|
"grad_norm": 0.09623388946056366, |
|
"learning_rate": 8.798104956268223e-06, |
|
"loss": 0.0088, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9.176656151419559, |
|
"grad_norm": 0.6915100812911987, |
|
"learning_rate": 8.779883381924199e-06, |
|
"loss": 0.0073, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 9.281808622502629, |
|
"grad_norm": 0.6448748707771301, |
|
"learning_rate": 8.761661807580175e-06, |
|
"loss": 0.0089, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.386961093585699, |
|
"grad_norm": 1.2592213153839111, |
|
"learning_rate": 8.743440233236153e-06, |
|
"loss": 0.0096, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 9.49211356466877, |
|
"grad_norm": 0.8664531111717224, |
|
"learning_rate": 8.72521865889213e-06, |
|
"loss": 0.0073, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.59726603575184, |
|
"grad_norm": 0.49285972118377686, |
|
"learning_rate": 8.706997084548106e-06, |
|
"loss": 0.0078, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 9.70241850683491, |
|
"grad_norm": 0.9507968425750732, |
|
"learning_rate": 8.688775510204082e-06, |
|
"loss": 0.0092, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.807570977917981, |
|
"grad_norm": 1.0036101341247559, |
|
"learning_rate": 8.670553935860059e-06, |
|
"loss": 0.0103, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 9.912723449001051, |
|
"grad_norm": 0.8855874538421631, |
|
"learning_rate": 8.653061224489798e-06, |
|
"loss": 0.0111, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 10.021030494216614, |
|
"grad_norm": 0.918958306312561, |
|
"learning_rate": 8.634839650145774e-06, |
|
"loss": 0.0097, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 10.126182965299684, |
|
"grad_norm": 1.442686915397644, |
|
"learning_rate": 8.61661807580175e-06, |
|
"loss": 0.0083, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.231335436382755, |
|
"grad_norm": 1.1829726696014404, |
|
"learning_rate": 8.598396501457726e-06, |
|
"loss": 0.0076, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 10.336487907465825, |
|
"grad_norm": 1.1316958665847778, |
|
"learning_rate": 8.580174927113704e-06, |
|
"loss": 0.0086, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10.441640378548897, |
|
"grad_norm": 0.5993586778640747, |
|
"learning_rate": 8.56195335276968e-06, |
|
"loss": 0.0085, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 10.546792849631967, |
|
"grad_norm": 1.1639779806137085, |
|
"learning_rate": 8.543731778425656e-06, |
|
"loss": 0.0076, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.546792849631967, |
|
"eval_loss": 0.1101309061050415, |
|
"eval_runtime": 1133.8828, |
|
"eval_samples_per_second": 2.874, |
|
"eval_steps_per_second": 0.719, |
|
"eval_wer": 0.09657365910076623, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.651945320715036, |
|
"grad_norm": 1.1275087594985962, |
|
"learning_rate": 8.525510204081632e-06, |
|
"loss": 0.008, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 10.757097791798108, |
|
"grad_norm": 1.17903470993042, |
|
"learning_rate": 8.50728862973761e-06, |
|
"loss": 0.0082, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10.862250262881178, |
|
"grad_norm": 0.35520169138908386, |
|
"learning_rate": 8.489067055393588e-06, |
|
"loss": 0.0085, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 10.967402733964247, |
|
"grad_norm": 1.5375522375106812, |
|
"learning_rate": 8.470845481049564e-06, |
|
"loss": 0.0062, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.07570977917981, |
|
"grad_norm": 0.49886855483055115, |
|
"learning_rate": 8.45262390670554e-06, |
|
"loss": 0.006, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 11.180862250262882, |
|
"grad_norm": 0.5618315935134888, |
|
"learning_rate": 8.434402332361516e-06, |
|
"loss": 0.0053, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 11.286014721345952, |
|
"grad_norm": 0.4988507032394409, |
|
"learning_rate": 8.416180758017494e-06, |
|
"loss": 0.006, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 11.391167192429021, |
|
"grad_norm": 0.4689825773239136, |
|
"learning_rate": 8.39795918367347e-06, |
|
"loss": 0.0059, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.496319663512093, |
|
"grad_norm": 0.637856662273407, |
|
"learning_rate": 8.379737609329447e-06, |
|
"loss": 0.0066, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 11.601472134595163, |
|
"grad_norm": 0.46745094656944275, |
|
"learning_rate": 8.361516034985424e-06, |
|
"loss": 0.007, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.706624605678233, |
|
"grad_norm": 1.153106689453125, |
|
"learning_rate": 8.3432944606414e-06, |
|
"loss": 0.0077, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 11.811777076761304, |
|
"grad_norm": 1.1093589067459106, |
|
"learning_rate": 8.325072886297377e-06, |
|
"loss": 0.0068, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.916929547844374, |
|
"grad_norm": 0.7349961400032043, |
|
"learning_rate": 8.306851311953353e-06, |
|
"loss": 0.0075, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 12.025236593059937, |
|
"grad_norm": 3.071120500564575, |
|
"learning_rate": 8.288629737609331e-06, |
|
"loss": 0.0067, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 12.130389064143007, |
|
"grad_norm": 1.33828604221344, |
|
"learning_rate": 8.270408163265307e-06, |
|
"loss": 0.0049, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 12.235541535226078, |
|
"grad_norm": 0.5961343050003052, |
|
"learning_rate": 8.252186588921283e-06, |
|
"loss": 0.0058, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.340694006309148, |
|
"grad_norm": 0.5771873593330383, |
|
"learning_rate": 8.23396501457726e-06, |
|
"loss": 0.0049, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 12.44584647739222, |
|
"grad_norm": 1.0507683753967285, |
|
"learning_rate": 8.215743440233237e-06, |
|
"loss": 0.0065, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12.55099894847529, |
|
"grad_norm": 0.579768180847168, |
|
"learning_rate": 8.197521865889213e-06, |
|
"loss": 0.0058, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 12.656151419558359, |
|
"grad_norm": 0.8235365748405457, |
|
"learning_rate": 8.17930029154519e-06, |
|
"loss": 0.0075, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.656151419558359, |
|
"eval_loss": 0.11558376252651215, |
|
"eval_runtime": 1148.2367, |
|
"eval_samples_per_second": 2.838, |
|
"eval_steps_per_second": 0.71, |
|
"eval_wer": 0.09917594332803238, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.656151419558359, |
|
"step": 3000, |
|
"total_flos": 5.55469178535936e+19, |
|
"train_loss": 0.17092564110457897, |
|
"train_runtime": 92948.5763, |
|
"train_samples_per_second": 9.82, |
|
"train_steps_per_second": 0.153 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 14220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.55469178535936e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|