|
{ |
|
"best_global_step": 1000, |
|
"best_metric": 35.75192541548439, |
|
"best_model_checkpoint": "./whisper-large-v2/In_house_data/checkpoint-1000", |
|
"epoch": 2.7482806052269604, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013755158184319119, |
|
"grad_norm": 21.1955509185791, |
|
"learning_rate": 2.666666666666667e-07, |
|
"loss": 3.905, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.027510316368638238, |
|
"grad_norm": 11.807918548583984, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 3.6009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04126547455295736, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.333333333333334e-07, |
|
"loss": 3.5008, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.055020632737276476, |
|
"grad_norm": 12.797011375427246, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 3.2938, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0687757909215956, |
|
"grad_norm": 11.184866905212402, |
|
"learning_rate": 1.5333333333333334e-06, |
|
"loss": 2.9611, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08253094910591471, |
|
"grad_norm": 12.54674243927002, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"loss": 2.5185, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09628610729023383, |
|
"grad_norm": 6.0443196296691895, |
|
"learning_rate": 2.2e-06, |
|
"loss": 2.433, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11004126547455295, |
|
"grad_norm": 5.060392379760742, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"loss": 2.1781, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12379642365887207, |
|
"grad_norm": 4.572280406951904, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 2.0818, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1375515818431912, |
|
"grad_norm": 5.422489643096924, |
|
"learning_rate": 3.133333333333334e-06, |
|
"loss": 2.1037, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15130674002751032, |
|
"grad_norm": 4.844382286071777, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"loss": 1.9358, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16506189821182943, |
|
"grad_norm": 9.222413063049316, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 1.8682, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17881705639614856, |
|
"grad_norm": 4.705779075622559, |
|
"learning_rate": 4.133333333333333e-06, |
|
"loss": 1.6894, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19257221458046767, |
|
"grad_norm": 9.01275634765625, |
|
"learning_rate": 4.4666666666666665e-06, |
|
"loss": 1.4863, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2063273727647868, |
|
"grad_norm": 21.184419631958008, |
|
"learning_rate": 4.7333333333333335e-06, |
|
"loss": 1.4149, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2200825309491059, |
|
"grad_norm": 17.06338119506836, |
|
"learning_rate": 5.0666666666666676e-06, |
|
"loss": 1.3539, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23383768913342504, |
|
"grad_norm": 4.059953689575195, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 1.3184, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.24759284731774414, |
|
"grad_norm": 3.739663600921631, |
|
"learning_rate": 5.733333333333334e-06, |
|
"loss": 1.2847, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2613480055020633, |
|
"grad_norm": 6.212688446044922, |
|
"learning_rate": 6.066666666666667e-06, |
|
"loss": 1.2833, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2751031636863824, |
|
"grad_norm": 7.45237922668457, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 1.2058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28885832187070154, |
|
"grad_norm": 3.6827175617218018, |
|
"learning_rate": 6.733333333333334e-06, |
|
"loss": 1.1704, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.30261348005502064, |
|
"grad_norm": 3.8309476375579834, |
|
"learning_rate": 7.066666666666667e-06, |
|
"loss": 1.1297, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31636863823933975, |
|
"grad_norm": 4.041064739227295, |
|
"learning_rate": 7.4e-06, |
|
"loss": 1.1995, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.33012379642365886, |
|
"grad_norm": 4.068305969238281, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 1.176, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.343878954607978, |
|
"grad_norm": 11.103217124938965, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 1.1701, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3576341127922971, |
|
"grad_norm": 3.5990092754364014, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.0819, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3713892709766162, |
|
"grad_norm": 4.472628593444824, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 1.1597, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.38514442916093533, |
|
"grad_norm": 4.027596473693848, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 1.1287, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3988995873452545, |
|
"grad_norm": 3.919308662414551, |
|
"learning_rate": 9.4e-06, |
|
"loss": 1.1378, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4126547455295736, |
|
"grad_norm": 3.2769739627838135, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 1.0651, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4264099037138927, |
|
"grad_norm": 3.678276300430298, |
|
"learning_rate": 9.997701149425289e-06, |
|
"loss": 1.0233, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4401650618982118, |
|
"grad_norm": 3.9921329021453857, |
|
"learning_rate": 9.986206896551724e-06, |
|
"loss": 0.9842, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.45392022008253097, |
|
"grad_norm": 4.81342077255249, |
|
"learning_rate": 9.974712643678162e-06, |
|
"loss": 0.9774, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4676753782668501, |
|
"grad_norm": 3.1831002235412598, |
|
"learning_rate": 9.963218390804599e-06, |
|
"loss": 0.9691, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4814305364511692, |
|
"grad_norm": 3.0080406665802, |
|
"learning_rate": 9.951724137931035e-06, |
|
"loss": 0.8912, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4951856946354883, |
|
"grad_norm": 4.207084655761719, |
|
"learning_rate": 9.940229885057472e-06, |
|
"loss": 0.9708, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5089408528198074, |
|
"grad_norm": 3.468383550643921, |
|
"learning_rate": 9.928735632183909e-06, |
|
"loss": 0.8915, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5226960110041265, |
|
"grad_norm": 3.9594309329986572, |
|
"learning_rate": 9.917241379310347e-06, |
|
"loss": 0.9415, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5364511691884457, |
|
"grad_norm": 3.3678927421569824, |
|
"learning_rate": 9.905747126436782e-06, |
|
"loss": 0.8949, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5502063273727648, |
|
"grad_norm": 3.1255943775177, |
|
"learning_rate": 9.89425287356322e-06, |
|
"loss": 0.9411, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5639614855570839, |
|
"grad_norm": 3.340153694152832, |
|
"learning_rate": 9.882758620689657e-06, |
|
"loss": 0.9468, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5777166437414031, |
|
"grad_norm": 2.989037275314331, |
|
"learning_rate": 9.871264367816093e-06, |
|
"loss": 0.8987, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5914718019257221, |
|
"grad_norm": 3.296373128890991, |
|
"learning_rate": 9.85977011494253e-06, |
|
"loss": 0.9369, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6052269601100413, |
|
"grad_norm": 4.3851189613342285, |
|
"learning_rate": 9.848275862068966e-06, |
|
"loss": 0.8394, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6189821182943603, |
|
"grad_norm": 3.8930516242980957, |
|
"learning_rate": 9.836781609195403e-06, |
|
"loss": 0.7941, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6327372764786795, |
|
"grad_norm": 3.3190417289733887, |
|
"learning_rate": 9.82528735632184e-06, |
|
"loss": 0.9271, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6464924346629987, |
|
"grad_norm": 2.888249397277832, |
|
"learning_rate": 9.813793103448276e-06, |
|
"loss": 0.8505, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6602475928473177, |
|
"grad_norm": 2.989713668823242, |
|
"learning_rate": 9.802298850574713e-06, |
|
"loss": 0.833, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6740027510316369, |
|
"grad_norm": 3.96846866607666, |
|
"learning_rate": 9.79080459770115e-06, |
|
"loss": 0.8411, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.687757909215956, |
|
"grad_norm": 2.885392665863037, |
|
"learning_rate": 9.779310344827588e-06, |
|
"loss": 0.8882, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7015130674002751, |
|
"grad_norm": 3.2487101554870605, |
|
"learning_rate": 9.767816091954022e-06, |
|
"loss": 0.8684, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7152682255845942, |
|
"grad_norm": 3.253427505493164, |
|
"learning_rate": 9.75632183908046e-06, |
|
"loss": 0.8147, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7290233837689133, |
|
"grad_norm": 6.355091094970703, |
|
"learning_rate": 9.744827586206897e-06, |
|
"loss": 0.7958, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7427785419532325, |
|
"grad_norm": 4.210458278656006, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.7896, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7565337001375516, |
|
"grad_norm": 3.744110584259033, |
|
"learning_rate": 9.72183908045977e-06, |
|
"loss": 0.8067, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7702888583218707, |
|
"grad_norm": 3.912106513977051, |
|
"learning_rate": 9.710344827586207e-06, |
|
"loss": 0.8419, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7840440165061898, |
|
"grad_norm": 10.62960433959961, |
|
"learning_rate": 9.698850574712645e-06, |
|
"loss": 0.7707, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.797799174690509, |
|
"grad_norm": 3.216627836227417, |
|
"learning_rate": 9.68735632183908e-06, |
|
"loss": 0.8188, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.811554332874828, |
|
"grad_norm": 16.345537185668945, |
|
"learning_rate": 9.675862068965518e-06, |
|
"loss": 0.7763, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8253094910591472, |
|
"grad_norm": 3.217355489730835, |
|
"learning_rate": 9.664367816091955e-06, |
|
"loss": 0.8194, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8390646492434664, |
|
"grad_norm": 3.4968042373657227, |
|
"learning_rate": 9.652873563218392e-06, |
|
"loss": 0.8462, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8528198074277854, |
|
"grad_norm": 3.1585259437561035, |
|
"learning_rate": 9.641379310344828e-06, |
|
"loss": 0.7598, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8665749656121046, |
|
"grad_norm": 2.978710651397705, |
|
"learning_rate": 9.629885057471265e-06, |
|
"loss": 0.788, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8803301237964236, |
|
"grad_norm": 3.119516134262085, |
|
"learning_rate": 9.618390804597701e-06, |
|
"loss": 0.761, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8940852819807428, |
|
"grad_norm": 2.88628888130188, |
|
"learning_rate": 9.606896551724138e-06, |
|
"loss": 0.7441, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9078404401650619, |
|
"grad_norm": 2.832181453704834, |
|
"learning_rate": 9.595402298850576e-06, |
|
"loss": 0.7509, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.921595598349381, |
|
"grad_norm": 3.500303268432617, |
|
"learning_rate": 9.583908045977011e-06, |
|
"loss": 0.7589, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9353507565337001, |
|
"grad_norm": 2.673982858657837, |
|
"learning_rate": 9.57241379310345e-06, |
|
"loss": 0.7677, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9491059147180193, |
|
"grad_norm": 3.101490020751953, |
|
"learning_rate": 9.560919540229886e-06, |
|
"loss": 0.796, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9628610729023384, |
|
"grad_norm": 2.6055097579956055, |
|
"learning_rate": 9.549425287356323e-06, |
|
"loss": 0.75, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9766162310866575, |
|
"grad_norm": 4.560893535614014, |
|
"learning_rate": 9.537931034482759e-06, |
|
"loss": 0.7543, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9903713892709766, |
|
"grad_norm": 2.834599733352661, |
|
"learning_rate": 9.526436781609196e-06, |
|
"loss": 0.7355, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0027510316368637, |
|
"grad_norm": 3.335203170776367, |
|
"learning_rate": 9.514942528735634e-06, |
|
"loss": 0.6308, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.016506189821183, |
|
"grad_norm": 2.8525352478027344, |
|
"learning_rate": 9.503448275862069e-06, |
|
"loss": 0.6704, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.030261348005502, |
|
"grad_norm": 3.6981310844421387, |
|
"learning_rate": 9.491954022988507e-06, |
|
"loss": 0.6066, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0440165061898212, |
|
"grad_norm": 2.5168659687042236, |
|
"learning_rate": 9.480459770114944e-06, |
|
"loss": 0.6544, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0577716643741404, |
|
"grad_norm": 3.087484836578369, |
|
"learning_rate": 9.46896551724138e-06, |
|
"loss": 0.6235, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0715268225584593, |
|
"grad_norm": 2.891552448272705, |
|
"learning_rate": 9.457471264367817e-06, |
|
"loss": 0.5911, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0852819807427785, |
|
"grad_norm": 3.162491798400879, |
|
"learning_rate": 9.445977011494253e-06, |
|
"loss": 0.6187, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0990371389270976, |
|
"grad_norm": 2.9897255897521973, |
|
"learning_rate": 9.43448275862069e-06, |
|
"loss": 0.622, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1127922971114168, |
|
"grad_norm": 2.488380193710327, |
|
"learning_rate": 9.422988505747127e-06, |
|
"loss": 0.5951, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.126547455295736, |
|
"grad_norm": 4.275421619415283, |
|
"learning_rate": 9.411494252873565e-06, |
|
"loss": 0.5707, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.140302613480055, |
|
"grad_norm": 2.8604931831359863, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.5814, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.154057771664374, |
|
"grad_norm": 3.4569485187530518, |
|
"learning_rate": 9.388505747126438e-06, |
|
"loss": 0.6335, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1678129298486932, |
|
"grad_norm": 3.236696481704712, |
|
"learning_rate": 9.377011494252875e-06, |
|
"loss": 0.5976, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1815680880330124, |
|
"grad_norm": 2.5323238372802734, |
|
"learning_rate": 9.365517241379311e-06, |
|
"loss": 0.5809, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1953232462173315, |
|
"grad_norm": 3.0921990871429443, |
|
"learning_rate": 9.354022988505748e-06, |
|
"loss": 0.5672, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.2090784044016507, |
|
"grad_norm": 2.991499185562134, |
|
"learning_rate": 9.342528735632184e-06, |
|
"loss": 0.6058, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2228335625859696, |
|
"grad_norm": 2.8206140995025635, |
|
"learning_rate": 9.331034482758623e-06, |
|
"loss": 0.641, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2365887207702888, |
|
"grad_norm": 2.5404281616210938, |
|
"learning_rate": 9.319540229885058e-06, |
|
"loss": 0.5627, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.250343878954608, |
|
"grad_norm": 2.977123737335205, |
|
"learning_rate": 9.308045977011496e-06, |
|
"loss": 0.5843, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.2640990371389271, |
|
"grad_norm": 3.053406000137329, |
|
"learning_rate": 9.296551724137932e-06, |
|
"loss": 0.6054, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2778541953232463, |
|
"grad_norm": 2.8271589279174805, |
|
"learning_rate": 9.285057471264369e-06, |
|
"loss": 0.623, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.2916093535075652, |
|
"grad_norm": 2.7028932571411133, |
|
"learning_rate": 9.273563218390806e-06, |
|
"loss": 0.5977, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.3053645116918844, |
|
"grad_norm": 2.9352943897247314, |
|
"learning_rate": 9.262068965517242e-06, |
|
"loss": 0.517, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3191196698762035, |
|
"grad_norm": 2.8113648891448975, |
|
"learning_rate": 9.250574712643679e-06, |
|
"loss": 0.5724, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3328748280605227, |
|
"grad_norm": 2.876746892929077, |
|
"learning_rate": 9.239080459770115e-06, |
|
"loss": 0.5633, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.3466299862448419, |
|
"grad_norm": 3.2946653366088867, |
|
"learning_rate": 9.227586206896552e-06, |
|
"loss": 0.6288, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.360385144429161, |
|
"grad_norm": 3.5094716548919678, |
|
"learning_rate": 9.216091954022988e-06, |
|
"loss": 0.5127, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.3741403026134802, |
|
"grad_norm": 2.5474300384521484, |
|
"learning_rate": 9.204597701149425e-06, |
|
"loss": 0.5787, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3741403026134802, |
|
"eval_bleu": 0.4206719346931395, |
|
"eval_cer": 18.365245374094933, |
|
"eval_loss": 0.7084596753120422, |
|
"eval_runtime": 733.6947, |
|
"eval_samples_per_second": 0.664, |
|
"eval_steps_per_second": 0.333, |
|
"eval_wer": 41.43696797730036, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3878954607977991, |
|
"grad_norm": 2.845057249069214, |
|
"learning_rate": 9.193103448275863e-06, |
|
"loss": 0.5554, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.4016506189821183, |
|
"grad_norm": 3.106311082839966, |
|
"learning_rate": 9.1816091954023e-06, |
|
"loss": 0.5587, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.4154057771664375, |
|
"grad_norm": 3.01326322555542, |
|
"learning_rate": 9.170114942528736e-06, |
|
"loss": 0.5574, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.4291609353507566, |
|
"grad_norm": 2.919343948364258, |
|
"learning_rate": 9.158620689655173e-06, |
|
"loss": 0.5855, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4429160935350756, |
|
"grad_norm": 2.971627712249756, |
|
"learning_rate": 9.14712643678161e-06, |
|
"loss": 0.5749, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4566712517193947, |
|
"grad_norm": 12.000533103942871, |
|
"learning_rate": 9.135632183908046e-06, |
|
"loss": 0.5129, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4704264099037139, |
|
"grad_norm": 2.927503824234009, |
|
"learning_rate": 9.124137931034483e-06, |
|
"loss": 0.5323, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.484181568088033, |
|
"grad_norm": 5.160599708557129, |
|
"learning_rate": 9.112643678160921e-06, |
|
"loss": 0.5212, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.4979367262723522, |
|
"grad_norm": 2.3872811794281006, |
|
"learning_rate": 9.101149425287356e-06, |
|
"loss": 0.5705, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.5116918844566714, |
|
"grad_norm": 2.8588502407073975, |
|
"learning_rate": 9.089655172413794e-06, |
|
"loss": 0.5803, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5254470426409905, |
|
"grad_norm": 2.890172004699707, |
|
"learning_rate": 9.07816091954023e-06, |
|
"loss": 0.5466, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5392022008253095, |
|
"grad_norm": 2.831617593765259, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.5848, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.5529573590096286, |
|
"grad_norm": 2.6025524139404297, |
|
"learning_rate": 9.055172413793104e-06, |
|
"loss": 0.5126, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.5667125171939478, |
|
"grad_norm": 3.2757222652435303, |
|
"learning_rate": 9.04367816091954e-06, |
|
"loss": 0.5366, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.5804676753782667, |
|
"grad_norm": 2.5900487899780273, |
|
"learning_rate": 9.032183908045977e-06, |
|
"loss": 0.5046, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.5942228335625859, |
|
"grad_norm": 2.6956920623779297, |
|
"learning_rate": 9.020689655172414e-06, |
|
"loss": 0.5223, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.607977991746905, |
|
"grad_norm": 3.1659083366394043, |
|
"learning_rate": 9.009195402298852e-06, |
|
"loss": 0.5554, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.6217331499312242, |
|
"grad_norm": 2.5289273262023926, |
|
"learning_rate": 8.997701149425289e-06, |
|
"loss": 0.508, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6354883081155434, |
|
"grad_norm": 2.5216193199157715, |
|
"learning_rate": 8.986206896551725e-06, |
|
"loss": 0.4941, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.6492434662998625, |
|
"grad_norm": 2.9627370834350586, |
|
"learning_rate": 8.974712643678162e-06, |
|
"loss": 0.5181, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6629986244841817, |
|
"grad_norm": 2.7418017387390137, |
|
"learning_rate": 8.963218390804598e-06, |
|
"loss": 0.5704, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.6767537826685008, |
|
"grad_norm": 2.909184694290161, |
|
"learning_rate": 8.951724137931035e-06, |
|
"loss": 0.5253, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.6905089408528198, |
|
"grad_norm": 3.2147293090820312, |
|
"learning_rate": 8.940229885057471e-06, |
|
"loss": 0.5127, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.704264099037139, |
|
"grad_norm": 2.9844889640808105, |
|
"learning_rate": 8.92873563218391e-06, |
|
"loss": 0.5295, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.718019257221458, |
|
"grad_norm": 2.8547401428222656, |
|
"learning_rate": 8.917241379310345e-06, |
|
"loss": 0.5191, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.731774415405777, |
|
"grad_norm": 2.84828782081604, |
|
"learning_rate": 8.905747126436783e-06, |
|
"loss": 0.517, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7455295735900962, |
|
"grad_norm": 2.946942090988159, |
|
"learning_rate": 8.89425287356322e-06, |
|
"loss": 0.5044, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.7592847317744154, |
|
"grad_norm": 2.5053775310516357, |
|
"learning_rate": 8.882758620689656e-06, |
|
"loss": 0.4814, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.7730398899587345, |
|
"grad_norm": 3.0368754863739014, |
|
"learning_rate": 8.871264367816093e-06, |
|
"loss": 0.5065, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.7867950481430537, |
|
"grad_norm": 2.697561264038086, |
|
"learning_rate": 8.85977011494253e-06, |
|
"loss": 0.5209, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8005502063273728, |
|
"grad_norm": 2.584867238998413, |
|
"learning_rate": 8.848275862068966e-06, |
|
"loss": 0.4833, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.814305364511692, |
|
"grad_norm": 3.0252091884613037, |
|
"learning_rate": 8.836781609195402e-06, |
|
"loss": 0.4799, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.8280605226960112, |
|
"grad_norm": 2.6939878463745117, |
|
"learning_rate": 8.82528735632184e-06, |
|
"loss": 0.4721, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.84181568088033, |
|
"grad_norm": 2.83384108543396, |
|
"learning_rate": 8.813793103448277e-06, |
|
"loss": 0.471, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.8555708390646493, |
|
"grad_norm": 2.3436243534088135, |
|
"learning_rate": 8.802298850574714e-06, |
|
"loss": 0.5411, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.8693259972489684, |
|
"grad_norm": 3.018484115600586, |
|
"learning_rate": 8.79080459770115e-06, |
|
"loss": 0.4651, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8830811554332874, |
|
"grad_norm": 3.808323860168457, |
|
"learning_rate": 8.779310344827587e-06, |
|
"loss": 0.4797, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.8968363136176065, |
|
"grad_norm": 2.987705945968628, |
|
"learning_rate": 8.767816091954024e-06, |
|
"loss": 0.4793, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.9105914718019257, |
|
"grad_norm": 2.7502503395080566, |
|
"learning_rate": 8.75632183908046e-06, |
|
"loss": 0.4851, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.9243466299862448, |
|
"grad_norm": 2.606917381286621, |
|
"learning_rate": 8.744827586206898e-06, |
|
"loss": 0.5304, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.938101788170564, |
|
"grad_norm": 2.6112170219421387, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.4538, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9518569463548832, |
|
"grad_norm": 2.6846561431884766, |
|
"learning_rate": 8.721839080459772e-06, |
|
"loss": 0.4557, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.9656121045392023, |
|
"grad_norm": 2.7344939708709717, |
|
"learning_rate": 8.710344827586208e-06, |
|
"loss": 0.4682, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.9793672627235215, |
|
"grad_norm": 2.805279493331909, |
|
"learning_rate": 8.698850574712645e-06, |
|
"loss": 0.4505, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9931224209078404, |
|
"grad_norm": 3.22121000289917, |
|
"learning_rate": 8.687356321839081e-06, |
|
"loss": 0.4693, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.0055020632737275, |
|
"grad_norm": 2.3558602333068848, |
|
"learning_rate": 8.675862068965518e-06, |
|
"loss": 0.3819, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.0192572214580466, |
|
"grad_norm": 2.5739705562591553, |
|
"learning_rate": 8.664367816091954e-06, |
|
"loss": 0.3373, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.033012379642366, |
|
"grad_norm": 3.0432779788970947, |
|
"learning_rate": 8.652873563218391e-06, |
|
"loss": 0.3862, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.046767537826685, |
|
"grad_norm": 2.4599671363830566, |
|
"learning_rate": 8.641379310344828e-06, |
|
"loss": 0.3276, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.060522696011004, |
|
"grad_norm": 2.53336501121521, |
|
"learning_rate": 8.629885057471266e-06, |
|
"loss": 0.3393, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.0742778541953233, |
|
"grad_norm": 2.4407801628112793, |
|
"learning_rate": 8.6183908045977e-06, |
|
"loss": 0.3815, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.0880330123796425, |
|
"grad_norm": 3.65480899810791, |
|
"learning_rate": 8.606896551724139e-06, |
|
"loss": 0.3488, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.1017881705639616, |
|
"grad_norm": 21.605276107788086, |
|
"learning_rate": 8.595402298850576e-06, |
|
"loss": 0.358, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.1155433287482808, |
|
"grad_norm": 3.20198917388916, |
|
"learning_rate": 8.583908045977012e-06, |
|
"loss": 0.3401, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.1292984869326, |
|
"grad_norm": 13.919597625732422, |
|
"learning_rate": 8.572413793103449e-06, |
|
"loss": 0.3487, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.1430536451169186, |
|
"grad_norm": 2.4645321369171143, |
|
"learning_rate": 8.560919540229885e-06, |
|
"loss": 0.334, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.156808803301238, |
|
"grad_norm": 3.1687004566192627, |
|
"learning_rate": 8.549425287356322e-06, |
|
"loss": 0.3467, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.170563961485557, |
|
"grad_norm": 11.212874412536621, |
|
"learning_rate": 8.537931034482759e-06, |
|
"loss": 0.3275, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.184319119669876, |
|
"grad_norm": 2.719883441925049, |
|
"learning_rate": 8.526436781609197e-06, |
|
"loss": 0.3548, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.1980742778541953, |
|
"grad_norm": 2.6431100368499756, |
|
"learning_rate": 8.514942528735632e-06, |
|
"loss": 0.3257, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2118294360385145, |
|
"grad_norm": 2.410521984100342, |
|
"learning_rate": 8.50344827586207e-06, |
|
"loss": 0.3428, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.2255845942228336, |
|
"grad_norm": 3.244124174118042, |
|
"learning_rate": 8.491954022988507e-06, |
|
"loss": 0.3311, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.2393397524071528, |
|
"grad_norm": 3.0431458950042725, |
|
"learning_rate": 8.480459770114943e-06, |
|
"loss": 0.3684, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.253094910591472, |
|
"grad_norm": 2.8572866916656494, |
|
"learning_rate": 8.46896551724138e-06, |
|
"loss": 0.359, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.266850068775791, |
|
"grad_norm": 2.5997812747955322, |
|
"learning_rate": 8.457471264367816e-06, |
|
"loss": 0.3391, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.28060522696011, |
|
"grad_norm": 2.5643837451934814, |
|
"learning_rate": 8.445977011494255e-06, |
|
"loss": 0.3347, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.294360385144429, |
|
"grad_norm": 2.5857532024383545, |
|
"learning_rate": 8.43448275862069e-06, |
|
"loss": 0.3346, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.308115543328748, |
|
"grad_norm": 2.9407107830047607, |
|
"learning_rate": 8.422988505747128e-06, |
|
"loss": 0.3213, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.3218707015130673, |
|
"grad_norm": 2.9194135665893555, |
|
"learning_rate": 8.411494252873564e-06, |
|
"loss": 0.2954, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.3356258596973865, |
|
"grad_norm": 2.4436566829681396, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.343, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3493810178817056, |
|
"grad_norm": 2.485232353210449, |
|
"learning_rate": 8.388505747126437e-06, |
|
"loss": 0.3565, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.3631361760660248, |
|
"grad_norm": 2.972649335861206, |
|
"learning_rate": 8.377011494252874e-06, |
|
"loss": 0.3461, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.376891334250344, |
|
"grad_norm": 2.566713571548462, |
|
"learning_rate": 8.36551724137931e-06, |
|
"loss": 0.3372, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.390646492434663, |
|
"grad_norm": 2.9575984477996826, |
|
"learning_rate": 8.354022988505747e-06, |
|
"loss": 0.3427, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.4044016506189823, |
|
"grad_norm": 2.6989376544952393, |
|
"learning_rate": 8.342528735632185e-06, |
|
"loss": 0.3391, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.4181568088033014, |
|
"grad_norm": 2.6465554237365723, |
|
"learning_rate": 8.33103448275862e-06, |
|
"loss": 0.3128, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.43191196698762, |
|
"grad_norm": 2.6031441688537598, |
|
"learning_rate": 8.319540229885059e-06, |
|
"loss": 0.3227, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.4456671251719393, |
|
"grad_norm": 2.6808745861053467, |
|
"learning_rate": 8.308045977011495e-06, |
|
"loss": 0.3072, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.4594222833562585, |
|
"grad_norm": 2.9904232025146484, |
|
"learning_rate": 8.296551724137932e-06, |
|
"loss": 0.3265, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.4731774415405776, |
|
"grad_norm": 2.7612838745117188, |
|
"learning_rate": 8.285057471264368e-06, |
|
"loss": 0.3245, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4869325997248968, |
|
"grad_norm": 2.7029314041137695, |
|
"learning_rate": 8.273563218390805e-06, |
|
"loss": 0.3273, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.500687757909216, |
|
"grad_norm": 2.87737774848938, |
|
"learning_rate": 8.262068965517243e-06, |
|
"loss": 0.308, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.514442916093535, |
|
"grad_norm": 3.425462245941162, |
|
"learning_rate": 8.250574712643678e-06, |
|
"loss": 0.3026, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.5281980742778543, |
|
"grad_norm": 2.760190725326538, |
|
"learning_rate": 8.239080459770116e-06, |
|
"loss": 0.3116, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.5419532324621734, |
|
"grad_norm": 2.7421491146087646, |
|
"learning_rate": 8.227586206896553e-06, |
|
"loss": 0.3149, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5557083906464926, |
|
"grad_norm": 2.5775585174560547, |
|
"learning_rate": 8.21609195402299e-06, |
|
"loss": 0.3198, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.5694635488308117, |
|
"grad_norm": 2.3301403522491455, |
|
"learning_rate": 8.204597701149426e-06, |
|
"loss": 0.2859, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.5832187070151305, |
|
"grad_norm": 2.293553590774536, |
|
"learning_rate": 8.193103448275863e-06, |
|
"loss": 0.3045, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.59697386519945, |
|
"grad_norm": 10.349946022033691, |
|
"learning_rate": 8.1816091954023e-06, |
|
"loss": 0.312, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.6107290233837688, |
|
"grad_norm": 2.5628931522369385, |
|
"learning_rate": 8.170114942528736e-06, |
|
"loss": 0.3153, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.624484181568088, |
|
"grad_norm": 3.248706340789795, |
|
"learning_rate": 8.158620689655174e-06, |
|
"loss": 0.3039, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.638239339752407, |
|
"grad_norm": 16.6440486907959, |
|
"learning_rate": 8.147126436781609e-06, |
|
"loss": 0.3417, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.6519944979367263, |
|
"grad_norm": 2.431967258453369, |
|
"learning_rate": 8.135632183908047e-06, |
|
"loss": 0.3354, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.6657496561210454, |
|
"grad_norm": 2.516252279281616, |
|
"learning_rate": 8.124137931034484e-06, |
|
"loss": 0.2806, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.6795048143053646, |
|
"grad_norm": 2.657532215118408, |
|
"learning_rate": 8.11264367816092e-06, |
|
"loss": 0.3043, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6932599724896837, |
|
"grad_norm": 2.3438785076141357, |
|
"learning_rate": 8.101149425287357e-06, |
|
"loss": 0.3201, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.707015130674003, |
|
"grad_norm": 3.0601348876953125, |
|
"learning_rate": 8.089655172413794e-06, |
|
"loss": 0.3047, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.720770288858322, |
|
"grad_norm": 2.5481855869293213, |
|
"learning_rate": 8.078160919540232e-06, |
|
"loss": 0.3035, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.7345254470426408, |
|
"grad_norm": 2.3380320072174072, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.2845, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.7482806052269604, |
|
"grad_norm": 2.7484054565429688, |
|
"learning_rate": 8.055172413793103e-06, |
|
"loss": 0.298, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7482806052269604, |
|
"eval_bleu": 0.44979726292769356, |
|
"eval_cer": 13.05229283990346, |
|
"eval_loss": 0.7391562461853027, |
|
"eval_runtime": 676.7738, |
|
"eval_samples_per_second": 0.72, |
|
"eval_steps_per_second": 0.361, |
|
"eval_wer": 35.75192541548439, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 13, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.35713091649536e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|