|
{ |
|
"best_metric": 27.064476999160213, |
|
"best_model_checkpoint": "./whisper-large-v2/second/checkpoint-3000", |
|
"epoch": 1.7164520743919884, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002861230329041488, |
|
"grad_norm": 21.954660415649414, |
|
"learning_rate": 2.666666666666667e-07, |
|
"loss": 3.1748, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005722460658082976, |
|
"grad_norm": 21.253908157348633, |
|
"learning_rate": 5.333333333333335e-07, |
|
"loss": 3.0924, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008583690987124463, |
|
"grad_norm": 12.657620429992676, |
|
"learning_rate": 8.666666666666668e-07, |
|
"loss": 3.0297, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011444921316165951, |
|
"grad_norm": 9.762269973754883, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 2.8025, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01430615164520744, |
|
"grad_norm": 9.680177688598633, |
|
"learning_rate": 1.5333333333333334e-06, |
|
"loss": 2.3626, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.017167381974248927, |
|
"grad_norm": 15.449712753295898, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"loss": 2.0648, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020028612303290415, |
|
"grad_norm": 5.520791530609131, |
|
"learning_rate": 2.2e-06, |
|
"loss": 1.8108, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.022889842632331903, |
|
"grad_norm": 4.177562713623047, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"loss": 1.7586, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02575107296137339, |
|
"grad_norm": 4.502848148345947, |
|
"learning_rate": 2.866666666666667e-06, |
|
"loss": 1.6625, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02861230329041488, |
|
"grad_norm": 4.344760417938232, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 1.5478, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031473533619456366, |
|
"grad_norm": 4.028677940368652, |
|
"learning_rate": 3.5333333333333335e-06, |
|
"loss": 1.5475, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.034334763948497854, |
|
"grad_norm": 4.139718532562256, |
|
"learning_rate": 3.866666666666667e-06, |
|
"loss": 1.3069, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03719599427753934, |
|
"grad_norm": 5.028609752655029, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 1.2144, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04005722460658083, |
|
"grad_norm": 4.420126438140869, |
|
"learning_rate": 4.533333333333334e-06, |
|
"loss": 0.9845, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04291845493562232, |
|
"grad_norm": 4.999370574951172, |
|
"learning_rate": 4.7333333333333335e-06, |
|
"loss": 1.0552, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.045779685264663805, |
|
"grad_norm": 3.8054537773132324, |
|
"learning_rate": 5.0666666666666676e-06, |
|
"loss": 0.9627, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04864091559370529, |
|
"grad_norm": 4.616196632385254, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.954, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05150214592274678, |
|
"grad_norm": 3.5308120250701904, |
|
"learning_rate": 5.733333333333334e-06, |
|
"loss": 0.8436, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05436337625178827, |
|
"grad_norm": 3.5140163898468018, |
|
"learning_rate": 6.066666666666667e-06, |
|
"loss": 0.8272, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05722460658082976, |
|
"grad_norm": 4.183732986450195, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.8312, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.060085836909871244, |
|
"grad_norm": 3.4372739791870117, |
|
"learning_rate": 6.733333333333334e-06, |
|
"loss": 0.8222, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06294706723891273, |
|
"grad_norm": 4.268640041351318, |
|
"learning_rate": 7.066666666666667e-06, |
|
"loss": 0.722, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06580829756795423, |
|
"grad_norm": 3.489219903945923, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.7458, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06866952789699571, |
|
"grad_norm": 3.189199447631836, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.7707, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0715307582260372, |
|
"grad_norm": 2.9029526710510254, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.7396, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07439198855507868, |
|
"grad_norm": 3.8515231609344482, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.7496, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07725321888412018, |
|
"grad_norm": 3.910144805908203, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.7224, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.08011444921316166, |
|
"grad_norm": 3.474600076675415, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.7432, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08297567954220315, |
|
"grad_norm": 4.4429779052734375, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.7369, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08583690987124463, |
|
"grad_norm": 2.9862446784973145, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.764, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08869814020028613, |
|
"grad_norm": 2.7896616458892822, |
|
"learning_rate": 9.997701149425289e-06, |
|
"loss": 0.6994, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.09155937052932761, |
|
"grad_norm": 3.3134992122650146, |
|
"learning_rate": 9.986206896551724e-06, |
|
"loss": 0.7356, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0944206008583691, |
|
"grad_norm": 3.047471046447754, |
|
"learning_rate": 9.974712643678162e-06, |
|
"loss": 0.7674, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09728183118741059, |
|
"grad_norm": 3.043182373046875, |
|
"learning_rate": 9.963218390804599e-06, |
|
"loss": 0.6429, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10014306151645208, |
|
"grad_norm": 3.04833722114563, |
|
"learning_rate": 9.951724137931035e-06, |
|
"loss": 0.6888, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10300429184549356, |
|
"grad_norm": 3.2061524391174316, |
|
"learning_rate": 9.940229885057472e-06, |
|
"loss": 0.6523, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10586552217453506, |
|
"grad_norm": 2.497347593307495, |
|
"learning_rate": 9.928735632183909e-06, |
|
"loss": 0.6607, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10872675250357654, |
|
"grad_norm": 3.134495973587036, |
|
"learning_rate": 9.917241379310347e-06, |
|
"loss": 0.6465, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11158798283261803, |
|
"grad_norm": 2.9670844078063965, |
|
"learning_rate": 9.905747126436782e-06, |
|
"loss": 0.7079, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11444921316165951, |
|
"grad_norm": 2.81243896484375, |
|
"learning_rate": 9.89425287356322e-06, |
|
"loss": 0.6617, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11731044349070101, |
|
"grad_norm": 3.66934871673584, |
|
"learning_rate": 9.882758620689657e-06, |
|
"loss": 0.696, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.12017167381974249, |
|
"grad_norm": 3.769137144088745, |
|
"learning_rate": 9.871264367816093e-06, |
|
"loss": 0.6105, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12303290414878398, |
|
"grad_norm": 3.402257204055786, |
|
"learning_rate": 9.85977011494253e-06, |
|
"loss": 0.6433, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12589413447782546, |
|
"grad_norm": 2.8765928745269775, |
|
"learning_rate": 9.848275862068966e-06, |
|
"loss": 0.6646, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12875536480686695, |
|
"grad_norm": 2.7113330364227295, |
|
"learning_rate": 9.836781609195403e-06, |
|
"loss": 0.5717, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13161659513590845, |
|
"grad_norm": 3.1563119888305664, |
|
"learning_rate": 9.82528735632184e-06, |
|
"loss": 0.579, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13447782546494993, |
|
"grad_norm": 2.4642252922058105, |
|
"learning_rate": 9.813793103448276e-06, |
|
"loss": 0.6197, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.13733905579399142, |
|
"grad_norm": 2.8252813816070557, |
|
"learning_rate": 9.802298850574713e-06, |
|
"loss": 0.5913, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1402002861230329, |
|
"grad_norm": 4.408820629119873, |
|
"learning_rate": 9.79080459770115e-06, |
|
"loss": 0.6163, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1430615164520744, |
|
"grad_norm": 2.8997461795806885, |
|
"learning_rate": 9.779310344827588e-06, |
|
"loss": 0.5658, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1459227467811159, |
|
"grad_norm": 2.968632459640503, |
|
"learning_rate": 9.767816091954022e-06, |
|
"loss": 0.6076, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.14878397711015737, |
|
"grad_norm": 2.8237318992614746, |
|
"learning_rate": 9.75632183908046e-06, |
|
"loss": 0.5988, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15164520743919885, |
|
"grad_norm": 2.6587376594543457, |
|
"learning_rate": 9.744827586206897e-06, |
|
"loss": 0.6651, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.15450643776824036, |
|
"grad_norm": 2.794654369354248, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.6383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15736766809728184, |
|
"grad_norm": 2.70902419090271, |
|
"learning_rate": 9.72183908045977e-06, |
|
"loss": 0.6082, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16022889842632332, |
|
"grad_norm": 2.6028928756713867, |
|
"learning_rate": 9.710344827586207e-06, |
|
"loss": 0.6708, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1630901287553648, |
|
"grad_norm": 2.982057571411133, |
|
"learning_rate": 9.698850574712645e-06, |
|
"loss": 0.5788, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1659513590844063, |
|
"grad_norm": 2.9549922943115234, |
|
"learning_rate": 9.68735632183908e-06, |
|
"loss": 0.5164, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1688125894134478, |
|
"grad_norm": 5.054598808288574, |
|
"learning_rate": 9.675862068965518e-06, |
|
"loss": 0.5822, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.17167381974248927, |
|
"grad_norm": 2.511012315750122, |
|
"learning_rate": 9.664367816091955e-06, |
|
"loss": 0.5995, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17453505007153075, |
|
"grad_norm": 3.233830451965332, |
|
"learning_rate": 9.652873563218392e-06, |
|
"loss": 0.5666, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.17739628040057226, |
|
"grad_norm": 2.620699167251587, |
|
"learning_rate": 9.641379310344828e-06, |
|
"loss": 0.5093, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18025751072961374, |
|
"grad_norm": 5.6488728523254395, |
|
"learning_rate": 9.629885057471265e-06, |
|
"loss": 0.4977, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18311874105865522, |
|
"grad_norm": 2.7786080837249756, |
|
"learning_rate": 9.618390804597701e-06, |
|
"loss": 0.5563, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1859799713876967, |
|
"grad_norm": 2.7111008167266846, |
|
"learning_rate": 9.606896551724138e-06, |
|
"loss": 0.5267, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1888412017167382, |
|
"grad_norm": 2.415985584259033, |
|
"learning_rate": 9.595402298850576e-06, |
|
"loss": 0.5335, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1917024320457797, |
|
"grad_norm": 2.8600497245788574, |
|
"learning_rate": 9.583908045977011e-06, |
|
"loss": 0.5402, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.19456366237482117, |
|
"grad_norm": 2.4760890007019043, |
|
"learning_rate": 9.57241379310345e-06, |
|
"loss": 0.5985, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19742489270386265, |
|
"grad_norm": 3.655806541442871, |
|
"learning_rate": 9.560919540229886e-06, |
|
"loss": 0.5025, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.20028612303290416, |
|
"grad_norm": 2.7408816814422607, |
|
"learning_rate": 9.549425287356323e-06, |
|
"loss": 0.5415, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20314735336194564, |
|
"grad_norm": 9.877788543701172, |
|
"learning_rate": 9.537931034482759e-06, |
|
"loss": 0.4862, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.20600858369098712, |
|
"grad_norm": 2.2370998859405518, |
|
"learning_rate": 9.526436781609196e-06, |
|
"loss": 0.5054, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2088698140200286, |
|
"grad_norm": 3.0609054565429688, |
|
"learning_rate": 9.514942528735634e-06, |
|
"loss": 0.5602, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2117310443490701, |
|
"grad_norm": 2.412261486053467, |
|
"learning_rate": 9.503448275862069e-06, |
|
"loss": 0.536, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2145922746781116, |
|
"grad_norm": 2.7460951805114746, |
|
"learning_rate": 9.491954022988507e-06, |
|
"loss": 0.525, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.21745350500715308, |
|
"grad_norm": 2.7391223907470703, |
|
"learning_rate": 9.480459770114944e-06, |
|
"loss": 0.5462, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22031473533619456, |
|
"grad_norm": 2.774522066116333, |
|
"learning_rate": 9.46896551724138e-06, |
|
"loss": 0.5564, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22317596566523606, |
|
"grad_norm": 2.564851999282837, |
|
"learning_rate": 9.457471264367817e-06, |
|
"loss": 0.5193, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.22603719599427755, |
|
"grad_norm": 2.2967424392700195, |
|
"learning_rate": 9.445977011494253e-06, |
|
"loss": 0.4689, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.22889842632331903, |
|
"grad_norm": 2.590604782104492, |
|
"learning_rate": 9.43448275862069e-06, |
|
"loss": 0.5028, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2317596566523605, |
|
"grad_norm": 2.7005531787872314, |
|
"learning_rate": 9.422988505747127e-06, |
|
"loss": 0.5287, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.23462088698140202, |
|
"grad_norm": 2.4030439853668213, |
|
"learning_rate": 9.411494252873565e-06, |
|
"loss": 0.5581, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2374821173104435, |
|
"grad_norm": 2.6434333324432373, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.4987, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.24034334763948498, |
|
"grad_norm": 2.167104959487915, |
|
"learning_rate": 9.388505747126438e-06, |
|
"loss": 0.4725, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24320457796852646, |
|
"grad_norm": 2.4801626205444336, |
|
"learning_rate": 9.377011494252875e-06, |
|
"loss": 0.5301, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.24606580829756797, |
|
"grad_norm": 2.600285291671753, |
|
"learning_rate": 9.365517241379311e-06, |
|
"loss": 0.5267, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.24892703862660945, |
|
"grad_norm": 2.541861057281494, |
|
"learning_rate": 9.354022988505748e-06, |
|
"loss": 0.5006, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.25178826895565093, |
|
"grad_norm": 2.667675018310547, |
|
"learning_rate": 9.342528735632184e-06, |
|
"loss": 0.4852, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.25464949928469244, |
|
"grad_norm": 3.0567309856414795, |
|
"learning_rate": 9.331034482758623e-06, |
|
"loss": 0.4525, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2575107296137339, |
|
"grad_norm": 2.588550090789795, |
|
"learning_rate": 9.319540229885058e-06, |
|
"loss": 0.5356, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2603719599427754, |
|
"grad_norm": 2.520589828491211, |
|
"learning_rate": 9.308045977011496e-06, |
|
"loss": 0.5632, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2632331902718169, |
|
"grad_norm": 2.466790199279785, |
|
"learning_rate": 9.296551724137932e-06, |
|
"loss": 0.5452, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.26609442060085836, |
|
"grad_norm": 2.561587333679199, |
|
"learning_rate": 9.285057471264369e-06, |
|
"loss": 0.5066, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.26895565092989987, |
|
"grad_norm": 2.183382511138916, |
|
"learning_rate": 9.273563218390806e-06, |
|
"loss": 0.4582, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2718168812589413, |
|
"grad_norm": 3.0747621059417725, |
|
"learning_rate": 9.262068965517242e-06, |
|
"loss": 0.4871, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.27467811158798283, |
|
"grad_norm": 3.8696835041046143, |
|
"learning_rate": 9.250574712643679e-06, |
|
"loss": 0.4493, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.27753934191702434, |
|
"grad_norm": 2.3706977367401123, |
|
"learning_rate": 9.239080459770115e-06, |
|
"loss": 0.4401, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2804005722460658, |
|
"grad_norm": 4.646365642547607, |
|
"learning_rate": 9.227586206896552e-06, |
|
"loss": 0.4801, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2832618025751073, |
|
"grad_norm": 2.519423723220825, |
|
"learning_rate": 9.216091954022988e-06, |
|
"loss": 0.4543, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2861230329041488, |
|
"grad_norm": 2.541934013366699, |
|
"learning_rate": 9.204597701149425e-06, |
|
"loss": 0.5319, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2861230329041488, |
|
"eval_bleu": 0.480183300780465, |
|
"eval_cer": 23.119512122029427, |
|
"eval_loss": 0.7426198720932007, |
|
"eval_runtime": 501.4653, |
|
"eval_samples_per_second": 2.638, |
|
"eval_steps_per_second": 0.331, |
|
"eval_wer": 40.98161799010917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28898426323319026, |
|
"grad_norm": 4.23681116104126, |
|
"learning_rate": 9.193103448275863e-06, |
|
"loss": 0.4907, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2918454935622318, |
|
"grad_norm": 2.405881404876709, |
|
"learning_rate": 9.1816091954023e-06, |
|
"loss": 0.4101, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2947067238912732, |
|
"grad_norm": 2.5162203311920166, |
|
"learning_rate": 9.170114942528736e-06, |
|
"loss": 0.4606, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.29756795422031473, |
|
"grad_norm": 2.335982084274292, |
|
"learning_rate": 9.158620689655173e-06, |
|
"loss": 0.4662, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.30042918454935624, |
|
"grad_norm": 2.8497443199157715, |
|
"learning_rate": 9.14712643678161e-06, |
|
"loss": 0.4826, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3032904148783977, |
|
"grad_norm": 2.6003220081329346, |
|
"learning_rate": 9.135632183908046e-06, |
|
"loss": 0.4986, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3061516452074392, |
|
"grad_norm": 2.5119142532348633, |
|
"learning_rate": 9.124137931034483e-06, |
|
"loss": 0.4771, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.3090128755364807, |
|
"grad_norm": 8.402257919311523, |
|
"learning_rate": 9.112643678160921e-06, |
|
"loss": 0.4317, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.31187410586552217, |
|
"grad_norm": 2.056044816970825, |
|
"learning_rate": 9.101149425287356e-06, |
|
"loss": 0.4711, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3147353361945637, |
|
"grad_norm": 2.1775403022766113, |
|
"learning_rate": 9.089655172413794e-06, |
|
"loss": 0.4298, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.31759656652360513, |
|
"grad_norm": 2.420010805130005, |
|
"learning_rate": 9.07816091954023e-06, |
|
"loss": 0.4389, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.32045779685264664, |
|
"grad_norm": 2.5747554302215576, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.4092, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32331902718168815, |
|
"grad_norm": 2.8916008472442627, |
|
"learning_rate": 9.055172413793104e-06, |
|
"loss": 0.5179, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.3261802575107296, |
|
"grad_norm": 2.4451074600219727, |
|
"learning_rate": 9.04367816091954e-06, |
|
"loss": 0.4783, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3290414878397711, |
|
"grad_norm": 2.4466512203216553, |
|
"learning_rate": 9.032183908045977e-06, |
|
"loss": 0.4781, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3319027181688126, |
|
"grad_norm": 2.4510862827301025, |
|
"learning_rate": 9.020689655172414e-06, |
|
"loss": 0.475, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.33476394849785407, |
|
"grad_norm": 2.7609736919403076, |
|
"learning_rate": 9.009195402298852e-06, |
|
"loss": 0.4245, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.3376251788268956, |
|
"grad_norm": 2.5262796878814697, |
|
"learning_rate": 8.997701149425289e-06, |
|
"loss": 0.4323, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.34048640915593703, |
|
"grad_norm": 2.2738754749298096, |
|
"learning_rate": 8.986206896551725e-06, |
|
"loss": 0.4612, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34334763948497854, |
|
"grad_norm": 2.770024538040161, |
|
"learning_rate": 8.974712643678162e-06, |
|
"loss": 0.4388, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34620886981402005, |
|
"grad_norm": 2.2855265140533447, |
|
"learning_rate": 8.963218390804598e-06, |
|
"loss": 0.4388, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.3490701001430615, |
|
"grad_norm": 2.3497042655944824, |
|
"learning_rate": 8.951724137931035e-06, |
|
"loss": 0.4515, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.351931330472103, |
|
"grad_norm": 2.3851680755615234, |
|
"learning_rate": 8.940229885057471e-06, |
|
"loss": 0.4598, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.3547925608011445, |
|
"grad_norm": 2.254589319229126, |
|
"learning_rate": 8.92873563218391e-06, |
|
"loss": 0.4421, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.35765379113018597, |
|
"grad_norm": 2.236403703689575, |
|
"learning_rate": 8.917241379310345e-06, |
|
"loss": 0.4465, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3605150214592275, |
|
"grad_norm": 2.3994998931884766, |
|
"learning_rate": 8.905747126436783e-06, |
|
"loss": 0.4199, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36337625178826893, |
|
"grad_norm": 2.434783458709717, |
|
"learning_rate": 8.89425287356322e-06, |
|
"loss": 0.4444, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.36623748211731044, |
|
"grad_norm": 2.076225757598877, |
|
"learning_rate": 8.882758620689656e-06, |
|
"loss": 0.4563, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.36909871244635195, |
|
"grad_norm": 2.2288596630096436, |
|
"learning_rate": 8.871264367816093e-06, |
|
"loss": 0.451, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3719599427753934, |
|
"grad_norm": 2.42144775390625, |
|
"learning_rate": 8.85977011494253e-06, |
|
"loss": 0.4545, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3748211731044349, |
|
"grad_norm": 2.624284267425537, |
|
"learning_rate": 8.848275862068966e-06, |
|
"loss": 0.4045, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3776824034334764, |
|
"grad_norm": 9.528400421142578, |
|
"learning_rate": 8.836781609195402e-06, |
|
"loss": 0.4278, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3805436337625179, |
|
"grad_norm": 2.683384656906128, |
|
"learning_rate": 8.82528735632184e-06, |
|
"loss": 0.4331, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3834048640915594, |
|
"grad_norm": 2.5828654766082764, |
|
"learning_rate": 8.813793103448277e-06, |
|
"loss": 0.4128, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.38626609442060084, |
|
"grad_norm": 2.038062334060669, |
|
"learning_rate": 8.802298850574714e-06, |
|
"loss": 0.4491, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.38912732474964234, |
|
"grad_norm": 2.5008745193481445, |
|
"learning_rate": 8.79080459770115e-06, |
|
"loss": 0.4472, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.39198855507868385, |
|
"grad_norm": 1.8770477771759033, |
|
"learning_rate": 8.779310344827587e-06, |
|
"loss": 0.3688, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3948497854077253, |
|
"grad_norm": 6.977114200592041, |
|
"learning_rate": 8.767816091954024e-06, |
|
"loss": 0.4046, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3977110157367668, |
|
"grad_norm": 4.948948383331299, |
|
"learning_rate": 8.75632183908046e-06, |
|
"loss": 0.4068, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4005722460658083, |
|
"grad_norm": 2.363247871398926, |
|
"learning_rate": 8.744827586206898e-06, |
|
"loss": 0.4461, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4034334763948498, |
|
"grad_norm": 2.1947221755981445, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.4199, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.4062947067238913, |
|
"grad_norm": 2.2790868282318115, |
|
"learning_rate": 8.721839080459772e-06, |
|
"loss": 0.4671, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.40915593705293274, |
|
"grad_norm": 2.0782546997070312, |
|
"learning_rate": 8.710344827586208e-06, |
|
"loss": 0.4433, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.41201716738197425, |
|
"grad_norm": 2.1484241485595703, |
|
"learning_rate": 8.698850574712645e-06, |
|
"loss": 0.4461, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41487839771101576, |
|
"grad_norm": 3.469759225845337, |
|
"learning_rate": 8.687356321839081e-06, |
|
"loss": 0.4165, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4177396280400572, |
|
"grad_norm": 2.101419687271118, |
|
"learning_rate": 8.675862068965518e-06, |
|
"loss": 0.4002, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4206008583690987, |
|
"grad_norm": 2.3812413215637207, |
|
"learning_rate": 8.664367816091954e-06, |
|
"loss": 0.4196, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.4234620886981402, |
|
"grad_norm": 2.1074626445770264, |
|
"learning_rate": 8.652873563218391e-06, |
|
"loss": 0.4543, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4263233190271817, |
|
"grad_norm": 2.31530499458313, |
|
"learning_rate": 8.641379310344828e-06, |
|
"loss": 0.4452, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.4291845493562232, |
|
"grad_norm": 2.038053035736084, |
|
"learning_rate": 8.629885057471266e-06, |
|
"loss": 0.3887, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.43204577968526464, |
|
"grad_norm": 2.248845100402832, |
|
"learning_rate": 8.6183908045977e-06, |
|
"loss": 0.3911, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.43490701001430615, |
|
"grad_norm": 2.2125308513641357, |
|
"learning_rate": 8.606896551724139e-06, |
|
"loss": 0.3985, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.43776824034334766, |
|
"grad_norm": 2.6718173027038574, |
|
"learning_rate": 8.595402298850576e-06, |
|
"loss": 0.4349, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4406294706723891, |
|
"grad_norm": 2.338291645050049, |
|
"learning_rate": 8.583908045977012e-06, |
|
"loss": 0.4338, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4434907010014306, |
|
"grad_norm": 2.2783362865448, |
|
"learning_rate": 8.572413793103449e-06, |
|
"loss": 0.4223, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.44635193133047213, |
|
"grad_norm": 5.572329521179199, |
|
"learning_rate": 8.560919540229885e-06, |
|
"loss": 0.373, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4492131616595136, |
|
"grad_norm": 2.154139518737793, |
|
"learning_rate": 8.549425287356322e-06, |
|
"loss": 0.3849, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4520743919885551, |
|
"grad_norm": 2.2320711612701416, |
|
"learning_rate": 8.537931034482759e-06, |
|
"loss": 0.398, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.45493562231759654, |
|
"grad_norm": 1.9622141122817993, |
|
"learning_rate": 8.526436781609197e-06, |
|
"loss": 0.4432, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.45779685264663805, |
|
"grad_norm": 2.268714189529419, |
|
"learning_rate": 8.514942528735632e-06, |
|
"loss": 0.4021, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46065808297567956, |
|
"grad_norm": 2.613569974899292, |
|
"learning_rate": 8.50344827586207e-06, |
|
"loss": 0.4017, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.463519313304721, |
|
"grad_norm": 2.3678150177001953, |
|
"learning_rate": 8.491954022988507e-06, |
|
"loss": 0.3759, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4663805436337625, |
|
"grad_norm": 2.136413335800171, |
|
"learning_rate": 8.480459770114943e-06, |
|
"loss": 0.4278, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.46924177396280403, |
|
"grad_norm": 2.0563809871673584, |
|
"learning_rate": 8.46896551724138e-06, |
|
"loss": 0.4112, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4721030042918455, |
|
"grad_norm": 3.3801417350769043, |
|
"learning_rate": 8.457471264367816e-06, |
|
"loss": 0.3994, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.474964234620887, |
|
"grad_norm": 2.1917874813079834, |
|
"learning_rate": 8.445977011494255e-06, |
|
"loss": 0.3993, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.47782546494992845, |
|
"grad_norm": 2.640014410018921, |
|
"learning_rate": 8.43448275862069e-06, |
|
"loss": 0.4242, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.48068669527896996, |
|
"grad_norm": 2.17958664894104, |
|
"learning_rate": 8.422988505747128e-06, |
|
"loss": 0.3786, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48354792560801146, |
|
"grad_norm": 2.140859603881836, |
|
"learning_rate": 8.411494252873564e-06, |
|
"loss": 0.3993, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.4864091559370529, |
|
"grad_norm": 2.2732794284820557, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.3761, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4892703862660944, |
|
"grad_norm": 2.2494969367980957, |
|
"learning_rate": 8.388505747126437e-06, |
|
"loss": 0.403, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.49213161659513593, |
|
"grad_norm": 2.0700440406799316, |
|
"learning_rate": 8.377011494252874e-06, |
|
"loss": 0.4249, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4949928469241774, |
|
"grad_norm": 2.3763673305511475, |
|
"learning_rate": 8.36551724137931e-06, |
|
"loss": 0.3646, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4978540772532189, |
|
"grad_norm": 2.0693488121032715, |
|
"learning_rate": 8.354022988505747e-06, |
|
"loss": 0.3974, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5007153075822603, |
|
"grad_norm": 2.108673095703125, |
|
"learning_rate": 8.342528735632185e-06, |
|
"loss": 0.3871, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5035765379113019, |
|
"grad_norm": 4.603137493133545, |
|
"learning_rate": 8.33103448275862e-06, |
|
"loss": 0.392, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5064377682403434, |
|
"grad_norm": 2.015547275543213, |
|
"learning_rate": 8.319540229885059e-06, |
|
"loss": 0.4202, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.5092989985693849, |
|
"grad_norm": 2.3268239498138428, |
|
"learning_rate": 8.308045977011495e-06, |
|
"loss": 0.3995, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5121602288984263, |
|
"grad_norm": 2.2089991569519043, |
|
"learning_rate": 8.296551724137932e-06, |
|
"loss": 0.4013, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.5150214592274678, |
|
"grad_norm": 2.1193923950195312, |
|
"learning_rate": 8.285057471264368e-06, |
|
"loss": 0.3817, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5178826895565093, |
|
"grad_norm": 1.7973988056182861, |
|
"learning_rate": 8.273563218390805e-06, |
|
"loss": 0.3724, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.5207439198855508, |
|
"grad_norm": 3.1105146408081055, |
|
"learning_rate": 8.262068965517243e-06, |
|
"loss": 0.3775, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5236051502145923, |
|
"grad_norm": 2.1949639320373535, |
|
"learning_rate": 8.250574712643678e-06, |
|
"loss": 0.4222, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.5264663805436338, |
|
"grad_norm": 9.827264785766602, |
|
"learning_rate": 8.239080459770116e-06, |
|
"loss": 0.399, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5293276108726752, |
|
"grad_norm": 2.282693862915039, |
|
"learning_rate": 8.227586206896553e-06, |
|
"loss": 0.3708, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5321888412017167, |
|
"grad_norm": 2.296790599822998, |
|
"learning_rate": 8.21609195402299e-06, |
|
"loss": 0.4161, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5350500715307582, |
|
"grad_norm": 1.8870588541030884, |
|
"learning_rate": 8.204597701149426e-06, |
|
"loss": 0.3556, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.5379113018597997, |
|
"grad_norm": 2.1730587482452393, |
|
"learning_rate": 8.193103448275863e-06, |
|
"loss": 0.3998, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5407725321888412, |
|
"grad_norm": 2.2204318046569824, |
|
"learning_rate": 8.1816091954023e-06, |
|
"loss": 0.3458, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.5436337625178826, |
|
"grad_norm": 1.9323971271514893, |
|
"learning_rate": 8.170114942528736e-06, |
|
"loss": 0.359, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5464949928469242, |
|
"grad_norm": 13.680197715759277, |
|
"learning_rate": 8.158620689655174e-06, |
|
"loss": 0.4351, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.5493562231759657, |
|
"grad_norm": 2.302557945251465, |
|
"learning_rate": 8.147126436781609e-06, |
|
"loss": 0.3962, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5522174535050072, |
|
"grad_norm": 2.17879056930542, |
|
"learning_rate": 8.135632183908047e-06, |
|
"loss": 0.4163, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5550786838340487, |
|
"grad_norm": 1.8171741962432861, |
|
"learning_rate": 8.124137931034484e-06, |
|
"loss": 0.4154, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5579399141630901, |
|
"grad_norm": 2.161654233932495, |
|
"learning_rate": 8.11264367816092e-06, |
|
"loss": 0.4062, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5608011444921316, |
|
"grad_norm": 2.201740264892578, |
|
"learning_rate": 8.101149425287357e-06, |
|
"loss": 0.3761, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5636623748211731, |
|
"grad_norm": 1.8697162866592407, |
|
"learning_rate": 8.089655172413794e-06, |
|
"loss": 0.3507, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5665236051502146, |
|
"grad_norm": 2.4330639839172363, |
|
"learning_rate": 8.078160919540232e-06, |
|
"loss": 0.3872, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5693848354792561, |
|
"grad_norm": 2.6203877925872803, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.4102, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5722460658082976, |
|
"grad_norm": 1.927872896194458, |
|
"learning_rate": 8.055172413793103e-06, |
|
"loss": 0.3354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5722460658082976, |
|
"eval_bleu": 0.5331656154016544, |
|
"eval_cer": 20.120471095927357, |
|
"eval_loss": 0.6430336833000183, |
|
"eval_runtime": 484.3877, |
|
"eval_samples_per_second": 2.731, |
|
"eval_steps_per_second": 0.343, |
|
"eval_wer": 36.26947839880564, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.575107296137339, |
|
"grad_norm": 2.234710216522217, |
|
"learning_rate": 8.043678160919542e-06, |
|
"loss": 0.3395, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.5779685264663805, |
|
"grad_norm": 1.911469578742981, |
|
"learning_rate": 8.032183908045977e-06, |
|
"loss": 0.3965, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.580829756795422, |
|
"grad_norm": 2.7243082523345947, |
|
"learning_rate": 8.020689655172415e-06, |
|
"loss": 0.381, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5836909871244635, |
|
"grad_norm": 2.4552667140960693, |
|
"learning_rate": 8.009195402298851e-06, |
|
"loss": 0.357, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.586552217453505, |
|
"grad_norm": 2.055844306945801, |
|
"learning_rate": 7.997701149425288e-06, |
|
"loss": 0.3691, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5894134477825465, |
|
"grad_norm": 2.588245391845703, |
|
"learning_rate": 7.986206896551725e-06, |
|
"loss": 0.3515, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.592274678111588, |
|
"grad_norm": 2.321615695953369, |
|
"learning_rate": 7.974712643678161e-06, |
|
"loss": 0.3567, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.5951359084406295, |
|
"grad_norm": 2.2714169025421143, |
|
"learning_rate": 7.963218390804598e-06, |
|
"loss": 0.3676, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.597997138769671, |
|
"grad_norm": 2.0706753730773926, |
|
"learning_rate": 7.951724137931034e-06, |
|
"loss": 0.39, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.6008583690987125, |
|
"grad_norm": 1.8779950141906738, |
|
"learning_rate": 7.940229885057473e-06, |
|
"loss": 0.3361, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6037195994277539, |
|
"grad_norm": 3.2601895332336426, |
|
"learning_rate": 7.928735632183907e-06, |
|
"loss": 0.3611, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.6065808297567954, |
|
"grad_norm": 1.9203985929489136, |
|
"learning_rate": 7.917241379310346e-06, |
|
"loss": 0.3906, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6094420600858369, |
|
"grad_norm": 2.388303518295288, |
|
"learning_rate": 7.905747126436782e-06, |
|
"loss": 0.3871, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.6123032904148784, |
|
"grad_norm": 1.9799396991729736, |
|
"learning_rate": 7.894252873563219e-06, |
|
"loss": 0.3654, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6151645207439199, |
|
"grad_norm": 2.21216082572937, |
|
"learning_rate": 7.882758620689655e-06, |
|
"loss": 0.4078, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6180257510729614, |
|
"grad_norm": 1.9444260597229004, |
|
"learning_rate": 7.871264367816092e-06, |
|
"loss": 0.3693, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6208869814020028, |
|
"grad_norm": 1.9393128156661987, |
|
"learning_rate": 7.85977011494253e-06, |
|
"loss": 0.3599, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.6237482117310443, |
|
"grad_norm": 1.9323508739471436, |
|
"learning_rate": 7.848275862068965e-06, |
|
"loss": 0.3322, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6266094420600858, |
|
"grad_norm": 2.9762513637542725, |
|
"learning_rate": 7.836781609195403e-06, |
|
"loss": 0.3957, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.6294706723891274, |
|
"grad_norm": 2.230358362197876, |
|
"learning_rate": 7.82528735632184e-06, |
|
"loss": 0.3563, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6323319027181689, |
|
"grad_norm": 2.114469051361084, |
|
"learning_rate": 7.813793103448277e-06, |
|
"loss": 0.3902, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.6351931330472103, |
|
"grad_norm": 3.268624782562256, |
|
"learning_rate": 7.802298850574713e-06, |
|
"loss": 0.3582, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6380543633762518, |
|
"grad_norm": 2.0319249629974365, |
|
"learning_rate": 7.79080459770115e-06, |
|
"loss": 0.3368, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.6409155937052933, |
|
"grad_norm": 2.1834099292755127, |
|
"learning_rate": 7.779310344827586e-06, |
|
"loss": 0.4234, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6437768240343348, |
|
"grad_norm": 2.5935869216918945, |
|
"learning_rate": 7.767816091954023e-06, |
|
"loss": 0.356, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6466380543633763, |
|
"grad_norm": 2.1476664543151855, |
|
"learning_rate": 7.756321839080461e-06, |
|
"loss": 0.3941, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6494992846924177, |
|
"grad_norm": 2.005326986312866, |
|
"learning_rate": 7.744827586206896e-06, |
|
"loss": 0.3448, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.6523605150214592, |
|
"grad_norm": 2.055753469467163, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.3169, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6552217453505007, |
|
"grad_norm": 2.293834924697876, |
|
"learning_rate": 7.721839080459771e-06, |
|
"loss": 0.3939, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.6580829756795422, |
|
"grad_norm": 2.178842782974243, |
|
"learning_rate": 7.710344827586208e-06, |
|
"loss": 0.3473, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6609442060085837, |
|
"grad_norm": 2.022002935409546, |
|
"learning_rate": 7.698850574712644e-06, |
|
"loss": 0.3884, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6638054363376252, |
|
"grad_norm": 2.30517840385437, |
|
"learning_rate": 7.68735632183908e-06, |
|
"loss": 0.3516, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.340341806411743, |
|
"learning_rate": 7.675862068965519e-06, |
|
"loss": 0.3469, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.6695278969957081, |
|
"grad_norm": 1.8878148794174194, |
|
"learning_rate": 7.664367816091954e-06, |
|
"loss": 0.3484, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6723891273247496, |
|
"grad_norm": 2.0944101810455322, |
|
"learning_rate": 7.652873563218392e-06, |
|
"loss": 0.3395, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6752503576537912, |
|
"grad_norm": 3.42524790763855, |
|
"learning_rate": 7.641379310344829e-06, |
|
"loss": 0.2986, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6781115879828327, |
|
"grad_norm": 2.1868836879730225, |
|
"learning_rate": 7.629885057471265e-06, |
|
"loss": 0.3645, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6809728183118741, |
|
"grad_norm": 2.6077160835266113, |
|
"learning_rate": 7.618390804597702e-06, |
|
"loss": 0.3482, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6838340486409156, |
|
"grad_norm": 2.132552146911621, |
|
"learning_rate": 7.6068965517241385e-06, |
|
"loss": 0.3358, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6866952789699571, |
|
"grad_norm": 1.9296846389770508, |
|
"learning_rate": 7.595402298850575e-06, |
|
"loss": 0.4169, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6895565092989986, |
|
"grad_norm": 1.8717005252838135, |
|
"learning_rate": 7.583908045977012e-06, |
|
"loss": 0.4115, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.6924177396280401, |
|
"grad_norm": 1.8066362142562866, |
|
"learning_rate": 7.572413793103449e-06, |
|
"loss": 0.3517, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6952789699570815, |
|
"grad_norm": 2.3206546306610107, |
|
"learning_rate": 7.560919540229885e-06, |
|
"loss": 0.3548, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.698140200286123, |
|
"grad_norm": 3.7949299812316895, |
|
"learning_rate": 7.549425287356322e-06, |
|
"loss": 0.393, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7010014306151645, |
|
"grad_norm": 2.03859281539917, |
|
"learning_rate": 7.53793103448276e-06, |
|
"loss": 0.4031, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.703862660944206, |
|
"grad_norm": 1.8809409141540527, |
|
"learning_rate": 7.526436781609196e-06, |
|
"loss": 0.3643, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7067238912732475, |
|
"grad_norm": 2.139955759048462, |
|
"learning_rate": 7.514942528735633e-06, |
|
"loss": 0.3614, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.709585121602289, |
|
"grad_norm": 5.799084186553955, |
|
"learning_rate": 7.503448275862069e-06, |
|
"loss": 0.3257, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7124463519313304, |
|
"grad_norm": 2.0690135955810547, |
|
"learning_rate": 7.491954022988507e-06, |
|
"loss": 0.3545, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.7153075822603719, |
|
"grad_norm": 1.9870096445083618, |
|
"learning_rate": 7.4804597701149425e-06, |
|
"loss": 0.3142, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7181688125894135, |
|
"grad_norm": 51.02791976928711, |
|
"learning_rate": 7.46896551724138e-06, |
|
"loss": 0.3382, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.721030042918455, |
|
"grad_norm": 2.2177982330322266, |
|
"learning_rate": 7.457471264367817e-06, |
|
"loss": 0.3917, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7238912732474965, |
|
"grad_norm": 2.1543567180633545, |
|
"learning_rate": 7.445977011494253e-06, |
|
"loss": 0.3522, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.7267525035765379, |
|
"grad_norm": 2.217419147491455, |
|
"learning_rate": 7.4344827586206906e-06, |
|
"loss": 0.3539, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7296137339055794, |
|
"grad_norm": 2.348618984222412, |
|
"learning_rate": 7.422988505747127e-06, |
|
"loss": 0.336, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7324749642346209, |
|
"grad_norm": 2.39912486076355, |
|
"learning_rate": 7.411494252873564e-06, |
|
"loss": 0.2934, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7353361945636624, |
|
"grad_norm": 2.20755934715271, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.3641, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.7381974248927039, |
|
"grad_norm": 2.0606160163879395, |
|
"learning_rate": 7.388505747126438e-06, |
|
"loss": 0.3378, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7410586552217453, |
|
"grad_norm": 2.7238168716430664, |
|
"learning_rate": 7.3770114942528735e-06, |
|
"loss": 0.3276, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.7439198855507868, |
|
"grad_norm": 1.9976911544799805, |
|
"learning_rate": 7.365517241379311e-06, |
|
"loss": 0.3222, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7467811158798283, |
|
"grad_norm": 1.986830711364746, |
|
"learning_rate": 7.354022988505748e-06, |
|
"loss": 0.3396, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.7496423462088698, |
|
"grad_norm": 2.4798574447631836, |
|
"learning_rate": 7.342528735632185e-06, |
|
"loss": 0.371, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7525035765379113, |
|
"grad_norm": 2.417635917663574, |
|
"learning_rate": 7.3310344827586215e-06, |
|
"loss": 0.306, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.7553648068669528, |
|
"grad_norm": 2.8017005920410156, |
|
"learning_rate": 7.319540229885058e-06, |
|
"loss": 0.3713, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7582260371959942, |
|
"grad_norm": 2.679738759994507, |
|
"learning_rate": 7.3080459770114955e-06, |
|
"loss": 0.3295, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7610872675250357, |
|
"grad_norm": 2.0234341621398926, |
|
"learning_rate": 7.296551724137931e-06, |
|
"loss": 0.3433, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7639484978540773, |
|
"grad_norm": 2.098039150238037, |
|
"learning_rate": 7.285057471264369e-06, |
|
"loss": 0.2951, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.7668097281831188, |
|
"grad_norm": 2.076972723007202, |
|
"learning_rate": 7.273563218390805e-06, |
|
"loss": 0.3264, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7696709585121603, |
|
"grad_norm": 1.5598034858703613, |
|
"learning_rate": 7.262068965517242e-06, |
|
"loss": 0.3109, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.7725321888412017, |
|
"grad_norm": 2.146202564239502, |
|
"learning_rate": 7.250574712643678e-06, |
|
"loss": 0.3736, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7753934191702432, |
|
"grad_norm": 1.838539958000183, |
|
"learning_rate": 7.239080459770116e-06, |
|
"loss": 0.3024, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7782546494992847, |
|
"grad_norm": 2.216581344604492, |
|
"learning_rate": 7.2275862068965515e-06, |
|
"loss": 0.3306, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7811158798283262, |
|
"grad_norm": 2.171466112136841, |
|
"learning_rate": 7.216091954022989e-06, |
|
"loss": 0.303, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.7839771101573677, |
|
"grad_norm": 2.2364814281463623, |
|
"learning_rate": 7.204597701149426e-06, |
|
"loss": 0.321, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7868383404864091, |
|
"grad_norm": 1.6637369394302368, |
|
"learning_rate": 7.193103448275862e-06, |
|
"loss": 0.3252, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7896995708154506, |
|
"grad_norm": 1.8782521486282349, |
|
"learning_rate": 7.1816091954022996e-06, |
|
"loss": 0.3786, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7925608011444921, |
|
"grad_norm": 2.16534423828125, |
|
"learning_rate": 7.170114942528736e-06, |
|
"loss": 0.3426, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.7954220314735336, |
|
"grad_norm": 2.1053144931793213, |
|
"learning_rate": 7.1586206896551736e-06, |
|
"loss": 0.3505, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7982832618025751, |
|
"grad_norm": 6.17349100112915, |
|
"learning_rate": 7.147126436781609e-06, |
|
"loss": 0.2968, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.8011444921316166, |
|
"grad_norm": 2.3340353965759277, |
|
"learning_rate": 7.135632183908047e-06, |
|
"loss": 0.3762, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.804005722460658, |
|
"grad_norm": 2.3221354484558105, |
|
"learning_rate": 7.124137931034484e-06, |
|
"loss": 0.3955, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.8068669527896996, |
|
"grad_norm": 2.350771427154541, |
|
"learning_rate": 7.11264367816092e-06, |
|
"loss": 0.3191, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8097281831187411, |
|
"grad_norm": 1.8787072896957397, |
|
"learning_rate": 7.101149425287357e-06, |
|
"loss": 0.3006, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.8125894134477826, |
|
"grad_norm": 1.9597926139831543, |
|
"learning_rate": 7.089655172413794e-06, |
|
"loss": 0.2888, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8154506437768241, |
|
"grad_norm": 1.9055655002593994, |
|
"learning_rate": 7.0781609195402305e-06, |
|
"loss": 0.3297, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.8183118741058655, |
|
"grad_norm": 2.2036375999450684, |
|
"learning_rate": 7.066666666666667e-06, |
|
"loss": 0.3717, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.821173104434907, |
|
"grad_norm": 2.1537721157073975, |
|
"learning_rate": 7.0551724137931045e-06, |
|
"loss": 0.3656, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.8240343347639485, |
|
"grad_norm": 2.1095426082611084, |
|
"learning_rate": 7.04367816091954e-06, |
|
"loss": 0.3491, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.82689556509299, |
|
"grad_norm": 1.9522277116775513, |
|
"learning_rate": 7.032183908045978e-06, |
|
"loss": 0.3656, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.8297567954220315, |
|
"grad_norm": 2.347623109817505, |
|
"learning_rate": 7.020689655172414e-06, |
|
"loss": 0.3174, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8326180257510729, |
|
"grad_norm": 2.204845428466797, |
|
"learning_rate": 7.009195402298851e-06, |
|
"loss": 0.3624, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.8354792560801144, |
|
"grad_norm": 2.0607683658599854, |
|
"learning_rate": 6.997701149425287e-06, |
|
"loss": 0.3131, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8383404864091559, |
|
"grad_norm": 2.110132932662964, |
|
"learning_rate": 6.986206896551725e-06, |
|
"loss": 0.3632, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.8412017167381974, |
|
"grad_norm": 1.96920645236969, |
|
"learning_rate": 6.974712643678162e-06, |
|
"loss": 0.3401, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.844062947067239, |
|
"grad_norm": 3.4651010036468506, |
|
"learning_rate": 6.963218390804598e-06, |
|
"loss": 0.2866, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8469241773962805, |
|
"grad_norm": 2.317070484161377, |
|
"learning_rate": 6.951724137931035e-06, |
|
"loss": 0.3004, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8497854077253219, |
|
"grad_norm": 26.246292114257812, |
|
"learning_rate": 6.940229885057472e-06, |
|
"loss": 0.3586, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.8526466380543634, |
|
"grad_norm": 1.979914665222168, |
|
"learning_rate": 6.9287356321839086e-06, |
|
"loss": 0.3052, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8555078683834049, |
|
"grad_norm": 2.253858804702759, |
|
"learning_rate": 6.917241379310345e-06, |
|
"loss": 0.3112, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.8583690987124464, |
|
"grad_norm": 2.219684362411499, |
|
"learning_rate": 6.9057471264367826e-06, |
|
"loss": 0.2922, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8583690987124464, |
|
"eval_bleu": 0.5704059005886749, |
|
"eval_cer": 17.589679043423537, |
|
"eval_loss": 0.5921686291694641, |
|
"eval_runtime": 472.2636, |
|
"eval_samples_per_second": 2.801, |
|
"eval_steps_per_second": 0.351, |
|
"eval_wer": 31.27740972287021, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8612303290414879, |
|
"grad_norm": 1.9186010360717773, |
|
"learning_rate": 6.894252873563218e-06, |
|
"loss": 0.2853, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.8640915593705293, |
|
"grad_norm": 2.261216402053833, |
|
"learning_rate": 6.882758620689656e-06, |
|
"loss": 0.3318, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8669527896995708, |
|
"grad_norm": 3.1048824787139893, |
|
"learning_rate": 6.871264367816093e-06, |
|
"loss": 0.3328, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.8698140200286123, |
|
"grad_norm": 2.310605049133301, |
|
"learning_rate": 6.859770114942529e-06, |
|
"loss": 0.3264, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8726752503576538, |
|
"grad_norm": 1.8471814393997192, |
|
"learning_rate": 6.848275862068966e-06, |
|
"loss": 0.2892, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8755364806866953, |
|
"grad_norm": 2.1484620571136475, |
|
"learning_rate": 6.836781609195403e-06, |
|
"loss": 0.2602, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8783977110157367, |
|
"grad_norm": 1.8682204484939575, |
|
"learning_rate": 6.8252873563218395e-06, |
|
"loss": 0.303, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.8812589413447782, |
|
"grad_norm": 3.347831964492798, |
|
"learning_rate": 6.813793103448276e-06, |
|
"loss": 0.2832, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8841201716738197, |
|
"grad_norm": 1.732176661491394, |
|
"learning_rate": 6.8022988505747135e-06, |
|
"loss": 0.2986, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.8869814020028612, |
|
"grad_norm": 2.360858917236328, |
|
"learning_rate": 6.790804597701151e-06, |
|
"loss": 0.3109, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8898426323319027, |
|
"grad_norm": 1.6071327924728394, |
|
"learning_rate": 6.779310344827587e-06, |
|
"loss": 0.3077, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.8927038626609443, |
|
"grad_norm": 1.632338047027588, |
|
"learning_rate": 6.767816091954024e-06, |
|
"loss": 0.3101, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8955650929899857, |
|
"grad_norm": 2.3832032680511475, |
|
"learning_rate": 6.756321839080461e-06, |
|
"loss": 0.3123, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.8984263233190272, |
|
"grad_norm": 2.1065351963043213, |
|
"learning_rate": 6.744827586206897e-06, |
|
"loss": 0.3644, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9012875536480687, |
|
"grad_norm": 2.0931053161621094, |
|
"learning_rate": 6.733333333333334e-06, |
|
"loss": 0.3104, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9041487839771102, |
|
"grad_norm": 4.35342264175415, |
|
"learning_rate": 6.721839080459771e-06, |
|
"loss": 0.3217, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9070100143061517, |
|
"grad_norm": 1.878527283668518, |
|
"learning_rate": 6.710344827586207e-06, |
|
"loss": 0.3229, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.9098712446351931, |
|
"grad_norm": 8.411993980407715, |
|
"learning_rate": 6.698850574712644e-06, |
|
"loss": 0.2923, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9127324749642346, |
|
"grad_norm": 1.510301113128662, |
|
"learning_rate": 6.687356321839081e-06, |
|
"loss": 0.336, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.9155937052932761, |
|
"grad_norm": 2.0813026428222656, |
|
"learning_rate": 6.6758620689655176e-06, |
|
"loss": 0.3407, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9184549356223176, |
|
"grad_norm": 2.687464952468872, |
|
"learning_rate": 6.664367816091954e-06, |
|
"loss": 0.332, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.9213161659513591, |
|
"grad_norm": 2.227113723754883, |
|
"learning_rate": 6.6528735632183916e-06, |
|
"loss": 0.3387, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9241773962804005, |
|
"grad_norm": 2.1991989612579346, |
|
"learning_rate": 6.641379310344827e-06, |
|
"loss": 0.35, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.927038626609442, |
|
"grad_norm": 1.8104325532913208, |
|
"learning_rate": 6.629885057471265e-06, |
|
"loss": 0.298, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9298998569384835, |
|
"grad_norm": 1.8303800821304321, |
|
"learning_rate": 6.618390804597702e-06, |
|
"loss": 0.3369, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.932761087267525, |
|
"grad_norm": 2.31982159614563, |
|
"learning_rate": 6.606896551724139e-06, |
|
"loss": 0.3661, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9356223175965666, |
|
"grad_norm": 1.976562261581421, |
|
"learning_rate": 6.595402298850575e-06, |
|
"loss": 0.3207, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.9384835479256081, |
|
"grad_norm": 1.7467138767242432, |
|
"learning_rate": 6.583908045977012e-06, |
|
"loss": 0.3236, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9413447782546495, |
|
"grad_norm": 1.8210949897766113, |
|
"learning_rate": 6.572413793103449e-06, |
|
"loss": 0.3603, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.944206008583691, |
|
"grad_norm": 1.9193000793457031, |
|
"learning_rate": 6.560919540229885e-06, |
|
"loss": 0.2887, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9470672389127325, |
|
"grad_norm": 2.1192359924316406, |
|
"learning_rate": 6.5494252873563225e-06, |
|
"loss": 0.2915, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.949928469241774, |
|
"grad_norm": 1.9616992473602295, |
|
"learning_rate": 6.53793103448276e-06, |
|
"loss": 0.3274, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9527896995708155, |
|
"grad_norm": 2.124767780303955, |
|
"learning_rate": 6.526436781609196e-06, |
|
"loss": 0.2469, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.9556509298998569, |
|
"grad_norm": 3.6808688640594482, |
|
"learning_rate": 6.514942528735633e-06, |
|
"loss": 0.3146, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9585121602288984, |
|
"grad_norm": 1.803134560585022, |
|
"learning_rate": 6.50344827586207e-06, |
|
"loss": 0.3169, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9613733905579399, |
|
"grad_norm": 2.043998956680298, |
|
"learning_rate": 6.491954022988506e-06, |
|
"loss": 0.2779, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9642346208869814, |
|
"grad_norm": 2.008289098739624, |
|
"learning_rate": 6.480459770114943e-06, |
|
"loss": 0.305, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.9670958512160229, |
|
"grad_norm": 2.0637102127075195, |
|
"learning_rate": 6.46896551724138e-06, |
|
"loss": 0.3521, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9699570815450643, |
|
"grad_norm": 2.0706608295440674, |
|
"learning_rate": 6.457471264367816e-06, |
|
"loss": 0.3179, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.9728183118741058, |
|
"grad_norm": 2.867837905883789, |
|
"learning_rate": 6.445977011494253e-06, |
|
"loss": 0.3405, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9756795422031473, |
|
"grad_norm": 2.0019872188568115, |
|
"learning_rate": 6.43448275862069e-06, |
|
"loss": 0.3149, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.9785407725321889, |
|
"grad_norm": 2.19998836517334, |
|
"learning_rate": 6.422988505747127e-06, |
|
"loss": 0.2983, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9814020028612304, |
|
"grad_norm": 2.0203206539154053, |
|
"learning_rate": 6.411494252873563e-06, |
|
"loss": 0.3399, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.9842632331902719, |
|
"grad_norm": 1.9604451656341553, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.2917, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9871244635193133, |
|
"grad_norm": 2.5613560676574707, |
|
"learning_rate": 6.388505747126438e-06, |
|
"loss": 0.3283, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.9899856938483548, |
|
"grad_norm": 2.0251967906951904, |
|
"learning_rate": 6.377011494252874e-06, |
|
"loss": 0.3311, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9928469241773963, |
|
"grad_norm": 2.370732545852661, |
|
"learning_rate": 6.365517241379311e-06, |
|
"loss": 0.3488, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.9957081545064378, |
|
"grad_norm": 2.151982545852661, |
|
"learning_rate": 6.354022988505748e-06, |
|
"loss": 0.322, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9985693848354793, |
|
"grad_norm": 5.340651988983154, |
|
"learning_rate": 6.342528735632184e-06, |
|
"loss": 0.3054, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.0011444921316166, |
|
"grad_norm": 2.277435302734375, |
|
"learning_rate": 6.331034482758621e-06, |
|
"loss": 0.2416, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.004005722460658, |
|
"grad_norm": 1.7808513641357422, |
|
"learning_rate": 6.319540229885058e-06, |
|
"loss": 0.2649, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.0068669527896996, |
|
"grad_norm": 1.7711721658706665, |
|
"learning_rate": 6.308045977011494e-06, |
|
"loss": 0.2364, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.009728183118741, |
|
"grad_norm": 1.7829340696334839, |
|
"learning_rate": 6.2965517241379315e-06, |
|
"loss": 0.2855, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.0125894134477826, |
|
"grad_norm": 1.5565894842147827, |
|
"learning_rate": 6.285057471264369e-06, |
|
"loss": 0.2562, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.015450643776824, |
|
"grad_norm": 1.6067863702774048, |
|
"learning_rate": 6.273563218390805e-06, |
|
"loss": 0.2253, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.0183118741058654, |
|
"grad_norm": 2.1315195560455322, |
|
"learning_rate": 6.262068965517242e-06, |
|
"loss": 0.2312, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.021173104434907, |
|
"grad_norm": 1.8657792806625366, |
|
"learning_rate": 6.250574712643679e-06, |
|
"loss": 0.247, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.0240343347639485, |
|
"grad_norm": 3.063485860824585, |
|
"learning_rate": 6.239080459770116e-06, |
|
"loss": 0.2215, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.02689556509299, |
|
"grad_norm": 1.955806016921997, |
|
"learning_rate": 6.227586206896552e-06, |
|
"loss": 0.2666, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.0297567954220315, |
|
"grad_norm": 1.8338463306427002, |
|
"learning_rate": 6.216091954022989e-06, |
|
"loss": 0.2201, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0326180257510729, |
|
"grad_norm": 1.8974248170852661, |
|
"learning_rate": 6.204597701149427e-06, |
|
"loss": 0.2229, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.0354792560801145, |
|
"grad_norm": 2.2553300857543945, |
|
"learning_rate": 6.193103448275862e-06, |
|
"loss": 0.2259, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.0383404864091559, |
|
"grad_norm": 2.0794475078582764, |
|
"learning_rate": 6.1816091954023e-06, |
|
"loss": 0.2728, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.0412017167381975, |
|
"grad_norm": 2.247518301010132, |
|
"learning_rate": 6.170114942528736e-06, |
|
"loss": 0.2729, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.044062947067239, |
|
"grad_norm": 1.6274186372756958, |
|
"learning_rate": 6.158620689655173e-06, |
|
"loss": 0.2038, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.0469241773962803, |
|
"grad_norm": 2.1243278980255127, |
|
"learning_rate": 6.1471264367816096e-06, |
|
"loss": 0.274, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.049785407725322, |
|
"grad_norm": 1.9605655670166016, |
|
"learning_rate": 6.135632183908047e-06, |
|
"loss": 0.2541, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.0526466380543633, |
|
"grad_norm": 1.710223913192749, |
|
"learning_rate": 6.124137931034483e-06, |
|
"loss": 0.21, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.055507868383405, |
|
"grad_norm": 1.8919130563735962, |
|
"learning_rate": 6.11264367816092e-06, |
|
"loss": 0.2507, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.0583690987124463, |
|
"grad_norm": 1.8515825271606445, |
|
"learning_rate": 6.101149425287357e-06, |
|
"loss": 0.2385, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.061230329041488, |
|
"grad_norm": 1.883626103401184, |
|
"learning_rate": 6.089655172413793e-06, |
|
"loss": 0.2451, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.0640915593705293, |
|
"grad_norm": 1.564466953277588, |
|
"learning_rate": 6.07816091954023e-06, |
|
"loss": 0.2471, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0669527896995707, |
|
"grad_norm": 1.9687753915786743, |
|
"learning_rate": 6.066666666666667e-06, |
|
"loss": 0.2778, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.0698140200286124, |
|
"grad_norm": 2.0385849475860596, |
|
"learning_rate": 6.055172413793105e-06, |
|
"loss": 0.2365, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0726752503576538, |
|
"grad_norm": 2.087181568145752, |
|
"learning_rate": 6.0436781609195405e-06, |
|
"loss": 0.2367, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0755364806866954, |
|
"grad_norm": 1.9057412147521973, |
|
"learning_rate": 6.032183908045978e-06, |
|
"loss": 0.2579, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.0783977110157368, |
|
"grad_norm": 1.5374144315719604, |
|
"learning_rate": 6.0206896551724145e-06, |
|
"loss": 0.2296, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.0812589413447782, |
|
"grad_norm": 1.5945782661437988, |
|
"learning_rate": 6.009195402298851e-06, |
|
"loss": 0.2564, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0841201716738198, |
|
"grad_norm": 2.265857219696045, |
|
"learning_rate": 5.997701149425288e-06, |
|
"loss": 0.2523, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.0869814020028612, |
|
"grad_norm": 1.6930688619613647, |
|
"learning_rate": 5.986206896551725e-06, |
|
"loss": 0.2054, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0898426323319028, |
|
"grad_norm": 2.441913604736328, |
|
"learning_rate": 5.974712643678161e-06, |
|
"loss": 0.2328, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.0927038626609442, |
|
"grad_norm": 1.6199525594711304, |
|
"learning_rate": 5.963218390804598e-06, |
|
"loss": 0.2068, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0955650929899856, |
|
"grad_norm": 1.6580902338027954, |
|
"learning_rate": 5.951724137931036e-06, |
|
"loss": 0.2232, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.0984263233190272, |
|
"grad_norm": 1.8525216579437256, |
|
"learning_rate": 5.940229885057471e-06, |
|
"loss": 0.2504, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.1012875536480686, |
|
"grad_norm": 15.594454765319824, |
|
"learning_rate": 5.928735632183909e-06, |
|
"loss": 0.2506, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.1041487839771102, |
|
"grad_norm": 1.6737009286880493, |
|
"learning_rate": 5.917241379310345e-06, |
|
"loss": 0.2715, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.1070100143061516, |
|
"grad_norm": 1.895065426826477, |
|
"learning_rate": 5.905747126436782e-06, |
|
"loss": 0.2166, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.109871244635193, |
|
"grad_norm": 1.8196299076080322, |
|
"learning_rate": 5.8942528735632186e-06, |
|
"loss": 0.2583, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.1127324749642347, |
|
"grad_norm": 2.042278289794922, |
|
"learning_rate": 5.882758620689656e-06, |
|
"loss": 0.2847, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.115593705293276, |
|
"grad_norm": 2.381350517272949, |
|
"learning_rate": 5.871264367816092e-06, |
|
"loss": 0.2275, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.1184549356223177, |
|
"grad_norm": 1.5981552600860596, |
|
"learning_rate": 5.859770114942529e-06, |
|
"loss": 0.2241, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.121316165951359, |
|
"grad_norm": 1.9198013544082642, |
|
"learning_rate": 5.848275862068966e-06, |
|
"loss": 0.2101, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.1241773962804005, |
|
"grad_norm": 11.413874626159668, |
|
"learning_rate": 5.836781609195403e-06, |
|
"loss": 0.3174, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.127038626609442, |
|
"grad_norm": 2.361424446105957, |
|
"learning_rate": 5.825287356321839e-06, |
|
"loss": 0.2478, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.1298998569384835, |
|
"grad_norm": 2.057995080947876, |
|
"learning_rate": 5.813793103448276e-06, |
|
"loss": 0.2354, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.1327610872675251, |
|
"grad_norm": 1.7415863275527954, |
|
"learning_rate": 5.802298850574714e-06, |
|
"loss": 0.2048, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.1356223175965665, |
|
"grad_norm": 1.927140235900879, |
|
"learning_rate": 5.7908045977011495e-06, |
|
"loss": 0.2467, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.138483547925608, |
|
"grad_norm": 1.8381935358047485, |
|
"learning_rate": 5.779310344827587e-06, |
|
"loss": 0.2603, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.1413447782546495, |
|
"grad_norm": 2.509054660797119, |
|
"learning_rate": 5.7678160919540235e-06, |
|
"loss": 0.2359, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.144206008583691, |
|
"grad_norm": 2.898055076599121, |
|
"learning_rate": 5.75632183908046e-06, |
|
"loss": 0.2451, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.144206008583691, |
|
"eval_bleu": 0.5963992770681681, |
|
"eval_cer": 15.110581677604962, |
|
"eval_loss": 0.5831637382507324, |
|
"eval_runtime": 451.8627, |
|
"eval_samples_per_second": 2.928, |
|
"eval_steps_per_second": 0.367, |
|
"eval_wer": 28.156200429224597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1470672389127325, |
|
"grad_norm": 3.072053909301758, |
|
"learning_rate": 5.744827586206897e-06, |
|
"loss": 0.2566, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.149928469241774, |
|
"grad_norm": 1.6033082008361816, |
|
"learning_rate": 5.733333333333334e-06, |
|
"loss": 0.2582, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.1527896995708153, |
|
"grad_norm": 13.557411193847656, |
|
"learning_rate": 5.72183908045977e-06, |
|
"loss": 0.2502, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.155650929899857, |
|
"grad_norm": 1.9101053476333618, |
|
"learning_rate": 5.710344827586207e-06, |
|
"loss": 0.2492, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.1585121602288984, |
|
"grad_norm": 1.8975669145584106, |
|
"learning_rate": 5.698850574712645e-06, |
|
"loss": 0.2205, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.16137339055794, |
|
"grad_norm": 7.335193157196045, |
|
"learning_rate": 5.68735632183908e-06, |
|
"loss": 0.2087, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.1642346208869814, |
|
"grad_norm": 2.010817050933838, |
|
"learning_rate": 5.675862068965518e-06, |
|
"loss": 0.2717, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.1670958512160228, |
|
"grad_norm": 1.9336493015289307, |
|
"learning_rate": 5.664367816091954e-06, |
|
"loss": 0.202, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.1699570815450644, |
|
"grad_norm": 1.725829839706421, |
|
"learning_rate": 5.652873563218392e-06, |
|
"loss": 0.2232, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.1728183118741058, |
|
"grad_norm": 1.7198960781097412, |
|
"learning_rate": 5.6413793103448275e-06, |
|
"loss": 0.2283, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1756795422031474, |
|
"grad_norm": 2.4473893642425537, |
|
"learning_rate": 5.629885057471265e-06, |
|
"loss": 0.2511, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.1785407725321888, |
|
"grad_norm": 1.8747438192367554, |
|
"learning_rate": 5.618390804597702e-06, |
|
"loss": 0.2217, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.1814020028612302, |
|
"grad_norm": 1.8291605710983276, |
|
"learning_rate": 5.606896551724138e-06, |
|
"loss": 0.2128, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.1842632331902718, |
|
"grad_norm": 1.9345383644104004, |
|
"learning_rate": 5.5954022988505756e-06, |
|
"loss": 0.2282, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.1871244635193132, |
|
"grad_norm": 1.9912608861923218, |
|
"learning_rate": 5.583908045977012e-06, |
|
"loss": 0.2305, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.1899856938483548, |
|
"grad_norm": 1.7613855600357056, |
|
"learning_rate": 5.572413793103449e-06, |
|
"loss": 0.2152, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1928469241773962, |
|
"grad_norm": 1.9164098501205444, |
|
"learning_rate": 5.560919540229885e-06, |
|
"loss": 0.2385, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.1957081545064379, |
|
"grad_norm": 1.8758496046066284, |
|
"learning_rate": 5.549425287356323e-06, |
|
"loss": 0.2197, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.1985693848354793, |
|
"grad_norm": 1.8170535564422607, |
|
"learning_rate": 5.5379310344827585e-06, |
|
"loss": 0.2273, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.2014306151645207, |
|
"grad_norm": 4.734244346618652, |
|
"learning_rate": 5.526436781609196e-06, |
|
"loss": 0.2379, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2042918454935623, |
|
"grad_norm": 1.7855401039123535, |
|
"learning_rate": 5.5149425287356325e-06, |
|
"loss": 0.2144, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.2071530758226037, |
|
"grad_norm": 1.7985947132110596, |
|
"learning_rate": 5.503448275862069e-06, |
|
"loss": 0.2322, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.2100143061516453, |
|
"grad_norm": 2.8456921577453613, |
|
"learning_rate": 5.491954022988506e-06, |
|
"loss": 0.2486, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.2128755364806867, |
|
"grad_norm": 1.7646148204803467, |
|
"learning_rate": 5.480459770114943e-06, |
|
"loss": 0.2119, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.215736766809728, |
|
"grad_norm": 1.7737618684768677, |
|
"learning_rate": 5.4689655172413805e-06, |
|
"loss": 0.2084, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.2185979971387697, |
|
"grad_norm": 2.054755926132202, |
|
"learning_rate": 5.457471264367816e-06, |
|
"loss": 0.2324, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.221459227467811, |
|
"grad_norm": 2.3212740421295166, |
|
"learning_rate": 5.445977011494254e-06, |
|
"loss": 0.2808, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.2243204577968527, |
|
"grad_norm": 2.056720495223999, |
|
"learning_rate": 5.43448275862069e-06, |
|
"loss": 0.2789, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.2271816881258941, |
|
"grad_norm": 2.018855333328247, |
|
"learning_rate": 5.422988505747127e-06, |
|
"loss": 0.2229, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.2300429184549357, |
|
"grad_norm": 1.7383620738983154, |
|
"learning_rate": 5.411494252873563e-06, |
|
"loss": 0.2371, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.2329041487839771, |
|
"grad_norm": 6.553476333618164, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.264, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.2357653791130185, |
|
"grad_norm": 6.118597984313965, |
|
"learning_rate": 5.3885057471264365e-06, |
|
"loss": 0.2769, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.2386266094420602, |
|
"grad_norm": 1.9519344568252563, |
|
"learning_rate": 5.377011494252874e-06, |
|
"loss": 0.2316, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.2414878397711016, |
|
"grad_norm": 2.161031723022461, |
|
"learning_rate": 5.365517241379311e-06, |
|
"loss": 0.2351, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.2443490701001432, |
|
"grad_norm": 1.8503310680389404, |
|
"learning_rate": 5.354022988505747e-06, |
|
"loss": 0.2303, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.2472103004291846, |
|
"grad_norm": 2.138127088546753, |
|
"learning_rate": 5.3425287356321846e-06, |
|
"loss": 0.2353, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.250071530758226, |
|
"grad_norm": 1.747727632522583, |
|
"learning_rate": 5.331034482758621e-06, |
|
"loss": 0.25, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.2529327610872676, |
|
"grad_norm": 2.1394238471984863, |
|
"learning_rate": 5.319540229885058e-06, |
|
"loss": 0.2618, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.255793991416309, |
|
"grad_norm": 2.055069923400879, |
|
"learning_rate": 5.308045977011494e-06, |
|
"loss": 0.28, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.2586552217453506, |
|
"grad_norm": 2.0998175144195557, |
|
"learning_rate": 5.296551724137932e-06, |
|
"loss": 0.2549, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.261516452074392, |
|
"grad_norm": 3.816950798034668, |
|
"learning_rate": 5.285057471264369e-06, |
|
"loss": 0.2376, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.2643776824034334, |
|
"grad_norm": 2.042818307876587, |
|
"learning_rate": 5.273563218390805e-06, |
|
"loss": 0.2322, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.267238912732475, |
|
"grad_norm": 1.8918198347091675, |
|
"learning_rate": 5.2620689655172415e-06, |
|
"loss": 0.2178, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.2701001430615164, |
|
"grad_norm": 1.6626828908920288, |
|
"learning_rate": 5.250574712643679e-06, |
|
"loss": 0.1998, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.272961373390558, |
|
"grad_norm": 1.6923973560333252, |
|
"learning_rate": 5.239080459770115e-06, |
|
"loss": 0.2257, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.2758226037195994, |
|
"grad_norm": 1.5752869844436646, |
|
"learning_rate": 5.227586206896552e-06, |
|
"loss": 0.1856, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.2786838340486408, |
|
"grad_norm": 2.132702350616455, |
|
"learning_rate": 5.2160919540229895e-06, |
|
"loss": 0.2622, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.2815450643776825, |
|
"grad_norm": 1.4981218576431274, |
|
"learning_rate": 5.204597701149425e-06, |
|
"loss": 0.2241, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.2844062947067239, |
|
"grad_norm": 2.2513628005981445, |
|
"learning_rate": 5.193103448275863e-06, |
|
"loss": 0.2307, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.2872675250357655, |
|
"grad_norm": 1.824265718460083, |
|
"learning_rate": 5.181609195402299e-06, |
|
"loss": 0.2252, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.2901287553648069, |
|
"grad_norm": 1.9756693840026855, |
|
"learning_rate": 5.170114942528736e-06, |
|
"loss": 0.2284, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.2929899856938483, |
|
"grad_norm": 1.7013108730316162, |
|
"learning_rate": 5.158620689655172e-06, |
|
"loss": 0.2399, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.2958512160228899, |
|
"grad_norm": 1.9803868532180786, |
|
"learning_rate": 5.14712643678161e-06, |
|
"loss": 0.2376, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.2987124463519313, |
|
"grad_norm": 1.7983365058898926, |
|
"learning_rate": 5.1356321839080455e-06, |
|
"loss": 0.2461, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.301573676680973, |
|
"grad_norm": 1.6878979206085205, |
|
"learning_rate": 5.124137931034483e-06, |
|
"loss": 0.2415, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.3044349070100143, |
|
"grad_norm": 1.693176507949829, |
|
"learning_rate": 5.11264367816092e-06, |
|
"loss": 0.2007, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.3072961373390557, |
|
"grad_norm": 2.423611640930176, |
|
"learning_rate": 5.101149425287357e-06, |
|
"loss": 0.2614, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.3101573676680973, |
|
"grad_norm": 1.6612671613693237, |
|
"learning_rate": 5.0896551724137936e-06, |
|
"loss": 0.206, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.3130185979971387, |
|
"grad_norm": 1.9878997802734375, |
|
"learning_rate": 5.07816091954023e-06, |
|
"loss": 0.214, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.3158798283261803, |
|
"grad_norm": 2.2919623851776123, |
|
"learning_rate": 5.0666666666666676e-06, |
|
"loss": 0.212, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3187410586552217, |
|
"grad_norm": 1.8216506242752075, |
|
"learning_rate": 5.055172413793103e-06, |
|
"loss": 0.2002, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.3216022889842631, |
|
"grad_norm": 2.561490058898926, |
|
"learning_rate": 5.043678160919541e-06, |
|
"loss": 0.2258, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.3244635193133047, |
|
"grad_norm": 1.7542258501052856, |
|
"learning_rate": 5.032183908045978e-06, |
|
"loss": 0.2214, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.3273247496423461, |
|
"grad_norm": 13.024736404418945, |
|
"learning_rate": 5.020689655172414e-06, |
|
"loss": 0.2181, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.3301859799713878, |
|
"grad_norm": 1.8888355493545532, |
|
"learning_rate": 5.009195402298851e-06, |
|
"loss": 0.213, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.3330472103004292, |
|
"grad_norm": 1.8508597612380981, |
|
"learning_rate": 4.997701149425288e-06, |
|
"loss": 0.2143, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.3359084406294706, |
|
"grad_norm": 2.070064067840576, |
|
"learning_rate": 4.9862068965517245e-06, |
|
"loss": 0.2511, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.3387696709585122, |
|
"grad_norm": 2.605440378189087, |
|
"learning_rate": 4.974712643678161e-06, |
|
"loss": 0.2259, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.3416309012875536, |
|
"grad_norm": 1.8583369255065918, |
|
"learning_rate": 4.9632183908045985e-06, |
|
"loss": 0.2394, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.3444921316165952, |
|
"grad_norm": 2.5312728881835938, |
|
"learning_rate": 4.951724137931035e-06, |
|
"loss": 0.2218, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.3473533619456366, |
|
"grad_norm": 2.2665977478027344, |
|
"learning_rate": 4.940229885057472e-06, |
|
"loss": 0.2255, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.350214592274678, |
|
"grad_norm": 1.9536534547805786, |
|
"learning_rate": 4.928735632183908e-06, |
|
"loss": 0.2055, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.3530758226037196, |
|
"grad_norm": 2.3409595489501953, |
|
"learning_rate": 4.917241379310345e-06, |
|
"loss": 0.234, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.355937052932761, |
|
"grad_norm": 1.9503809213638306, |
|
"learning_rate": 4.905747126436781e-06, |
|
"loss": 0.221, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.3587982832618026, |
|
"grad_norm": 1.5935649871826172, |
|
"learning_rate": 4.894252873563219e-06, |
|
"loss": 0.2168, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.361659513590844, |
|
"grad_norm": 1.8054594993591309, |
|
"learning_rate": 4.882758620689655e-06, |
|
"loss": 0.2136, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.3645207439198854, |
|
"grad_norm": 2.6980879306793213, |
|
"learning_rate": 4.871264367816093e-06, |
|
"loss": 0.2095, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.367381974248927, |
|
"grad_norm": 1.9824919700622559, |
|
"learning_rate": 4.859770114942529e-06, |
|
"loss": 0.2201, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.3702432045779684, |
|
"grad_norm": 1.7837703227996826, |
|
"learning_rate": 4.848275862068966e-06, |
|
"loss": 0.2373, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.37310443490701, |
|
"grad_norm": 3.7595748901367188, |
|
"learning_rate": 4.8367816091954026e-06, |
|
"loss": 0.206, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.3759656652360515, |
|
"grad_norm": 2.127598762512207, |
|
"learning_rate": 4.825287356321839e-06, |
|
"loss": 0.1929, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.3788268955650929, |
|
"grad_norm": 2.089553117752075, |
|
"learning_rate": 4.813793103448276e-06, |
|
"loss": 0.2033, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.3816881258941345, |
|
"grad_norm": 1.8560612201690674, |
|
"learning_rate": 4.802298850574713e-06, |
|
"loss": 0.1716, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.384549356223176, |
|
"grad_norm": 1.9387139081954956, |
|
"learning_rate": 4.79080459770115e-06, |
|
"loss": 0.1935, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.3874105865522175, |
|
"grad_norm": 1.860589623451233, |
|
"learning_rate": 4.779310344827587e-06, |
|
"loss": 0.2101, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.390271816881259, |
|
"grad_norm": 1.9807707071304321, |
|
"learning_rate": 4.767816091954024e-06, |
|
"loss": 0.2143, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.3931330472103003, |
|
"grad_norm": 1.8010809421539307, |
|
"learning_rate": 4.75632183908046e-06, |
|
"loss": 0.1802, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.395994277539342, |
|
"grad_norm": 2.161853790283203, |
|
"learning_rate": 4.744827586206897e-06, |
|
"loss": 0.1936, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.3988555078683835, |
|
"grad_norm": 2.5427753925323486, |
|
"learning_rate": 4.7333333333333335e-06, |
|
"loss": 0.2381, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.401716738197425, |
|
"grad_norm": 1.8699232339859009, |
|
"learning_rate": 4.72183908045977e-06, |
|
"loss": 0.1974, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.4045779685264663, |
|
"grad_norm": 2.080120086669922, |
|
"learning_rate": 4.7103448275862075e-06, |
|
"loss": 0.2287, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.407439198855508, |
|
"grad_norm": 4.648638725280762, |
|
"learning_rate": 4.698850574712644e-06, |
|
"loss": 0.1988, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.4103004291845493, |
|
"grad_norm": 1.942683458328247, |
|
"learning_rate": 4.6873563218390815e-06, |
|
"loss": 0.2285, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.413161659513591, |
|
"grad_norm": 1.915105938911438, |
|
"learning_rate": 4.675862068965517e-06, |
|
"loss": 0.2113, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.4160228898426324, |
|
"grad_norm": 1.8002665042877197, |
|
"learning_rate": 4.664367816091954e-06, |
|
"loss": 0.2011, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.4188841201716738, |
|
"grad_norm": 1.9071236848831177, |
|
"learning_rate": 4.652873563218391e-06, |
|
"loss": 0.1968, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.4217453505007154, |
|
"grad_norm": 1.786940097808838, |
|
"learning_rate": 4.641379310344828e-06, |
|
"loss": 0.2006, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.4246065808297568, |
|
"grad_norm": 2.5510807037353516, |
|
"learning_rate": 4.629885057471264e-06, |
|
"loss": 0.2088, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.4274678111587984, |
|
"grad_norm": 1.7373242378234863, |
|
"learning_rate": 4.618390804597702e-06, |
|
"loss": 0.2007, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.4303290414878398, |
|
"grad_norm": 1.6989120244979858, |
|
"learning_rate": 4.606896551724138e-06, |
|
"loss": 0.2401, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4303290414878398, |
|
"eval_bleu": 0.5879103462796057, |
|
"eval_cer": 16.617219575054694, |
|
"eval_loss": 0.5721695423126221, |
|
"eval_runtime": 484.1918, |
|
"eval_samples_per_second": 2.732, |
|
"eval_steps_per_second": 0.343, |
|
"eval_wer": 29.817112998040496, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4331902718168812, |
|
"grad_norm": 1.815650463104248, |
|
"learning_rate": 4.595402298850575e-06, |
|
"loss": 0.1995, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.4360515021459228, |
|
"grad_norm": 1.6936380863189697, |
|
"learning_rate": 4.5839080459770116e-06, |
|
"loss": 0.2073, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.4389127324749642, |
|
"grad_norm": 2.0940747261047363, |
|
"learning_rate": 4.572413793103448e-06, |
|
"loss": 0.2513, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.4417739628040058, |
|
"grad_norm": 1.7273896932601929, |
|
"learning_rate": 4.5609195402298856e-06, |
|
"loss": 0.2056, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.4446351931330472, |
|
"grad_norm": 1.7717649936676025, |
|
"learning_rate": 4.549425287356322e-06, |
|
"loss": 0.2089, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.4474964234620886, |
|
"grad_norm": 2.0581321716308594, |
|
"learning_rate": 4.537931034482759e-06, |
|
"loss": 0.2207, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.4503576537911302, |
|
"grad_norm": 2.054208517074585, |
|
"learning_rate": 4.526436781609196e-06, |
|
"loss": 0.2087, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.4532188841201716, |
|
"grad_norm": 2.047858953475952, |
|
"learning_rate": 4.514942528735633e-06, |
|
"loss": 0.2405, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.4560801144492133, |
|
"grad_norm": 1.9581420421600342, |
|
"learning_rate": 4.503448275862069e-06, |
|
"loss": 0.1779, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.4589413447782547, |
|
"grad_norm": 2.9552524089813232, |
|
"learning_rate": 4.491954022988506e-06, |
|
"loss": 0.2099, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.461802575107296, |
|
"grad_norm": 18.079795837402344, |
|
"learning_rate": 4.4804597701149425e-06, |
|
"loss": 0.2165, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.4646638054363377, |
|
"grad_norm": 1.990879774093628, |
|
"learning_rate": 4.46896551724138e-06, |
|
"loss": 0.2078, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.467525035765379, |
|
"grad_norm": 1.542114019393921, |
|
"learning_rate": 4.4574712643678165e-06, |
|
"loss": 0.1974, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.4703862660944207, |
|
"grad_norm": 1.6736189126968384, |
|
"learning_rate": 4.445977011494253e-06, |
|
"loss": 0.1994, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.473247496423462, |
|
"grad_norm": 1.9050010442733765, |
|
"learning_rate": 4.4344827586206905e-06, |
|
"loss": 0.1968, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.4761087267525035, |
|
"grad_norm": 2.1282589435577393, |
|
"learning_rate": 4.422988505747127e-06, |
|
"loss": 0.2174, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.478969957081545, |
|
"grad_norm": 2.1793580055236816, |
|
"learning_rate": 4.411494252873564e-06, |
|
"loss": 0.2112, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.4818311874105865, |
|
"grad_norm": 1.7508175373077393, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.2173, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.4846924177396281, |
|
"grad_norm": 1.8711589574813843, |
|
"learning_rate": 4.388505747126437e-06, |
|
"loss": 0.1944, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.4875536480686695, |
|
"grad_norm": 1.7508400678634644, |
|
"learning_rate": 4.377011494252874e-06, |
|
"loss": 0.2124, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.490414878397711, |
|
"grad_norm": 1.943958044052124, |
|
"learning_rate": 4.365517241379311e-06, |
|
"loss": 0.1899, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.4932761087267525, |
|
"grad_norm": 1.783746361732483, |
|
"learning_rate": 4.354022988505747e-06, |
|
"loss": 0.2267, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.496137339055794, |
|
"grad_norm": 2.324143171310425, |
|
"learning_rate": 4.342528735632184e-06, |
|
"loss": 0.2066, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.4989985693848356, |
|
"grad_norm": 6.195375442504883, |
|
"learning_rate": 4.3310344827586206e-06, |
|
"loss": 0.2285, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.501859799713877, |
|
"grad_norm": 1.9747071266174316, |
|
"learning_rate": 4.319540229885058e-06, |
|
"loss": 0.2448, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.5047210300429184, |
|
"grad_norm": 2.495802640914917, |
|
"learning_rate": 4.3080459770114946e-06, |
|
"loss": 0.1911, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.50758226037196, |
|
"grad_norm": 2.073364019393921, |
|
"learning_rate": 4.296551724137931e-06, |
|
"loss": 0.2217, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.5104434907010016, |
|
"grad_norm": 1.9022804498672485, |
|
"learning_rate": 4.2850574712643686e-06, |
|
"loss": 0.198, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.513304721030043, |
|
"grad_norm": 1.475517988204956, |
|
"learning_rate": 4.273563218390805e-06, |
|
"loss": 0.2037, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.5161659513590844, |
|
"grad_norm": 1.7946468591690063, |
|
"learning_rate": 4.262068965517242e-06, |
|
"loss": 0.199, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.5190271816881258, |
|
"grad_norm": 13.506173133850098, |
|
"learning_rate": 4.250574712643678e-06, |
|
"loss": 0.236, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.5218884120171674, |
|
"grad_norm": 2.0159194469451904, |
|
"learning_rate": 4.239080459770115e-06, |
|
"loss": 0.2094, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.524749642346209, |
|
"grad_norm": 1.893707036972046, |
|
"learning_rate": 4.227586206896552e-06, |
|
"loss": 0.2002, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.5276108726752504, |
|
"grad_norm": 1.8341419696807861, |
|
"learning_rate": 4.216091954022989e-06, |
|
"loss": 0.2072, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.5304721030042918, |
|
"grad_norm": 1.940000057220459, |
|
"learning_rate": 4.2045977011494255e-06, |
|
"loss": 0.2211, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 2.36838698387146, |
|
"learning_rate": 4.193103448275863e-06, |
|
"loss": 0.2139, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.5361945636623748, |
|
"grad_norm": 1.875691294670105, |
|
"learning_rate": 4.1816091954022995e-06, |
|
"loss": 0.1754, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.5390557939914165, |
|
"grad_norm": 1.5981868505477905, |
|
"learning_rate": 4.170114942528736e-06, |
|
"loss": 0.1998, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.5419170243204579, |
|
"grad_norm": 1.8597708940505981, |
|
"learning_rate": 4.158620689655173e-06, |
|
"loss": 0.2081, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.5447782546494992, |
|
"grad_norm": 2.2451958656311035, |
|
"learning_rate": 4.147126436781609e-06, |
|
"loss": 0.2084, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.5476394849785406, |
|
"grad_norm": 1.8167076110839844, |
|
"learning_rate": 4.135632183908047e-06, |
|
"loss": 0.193, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.5505007153075823, |
|
"grad_norm": 2.029426097869873, |
|
"learning_rate": 4.124137931034483e-06, |
|
"loss": 0.2223, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.5533619456366239, |
|
"grad_norm": 1.903867483139038, |
|
"learning_rate": 4.11264367816092e-06, |
|
"loss": 0.2066, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.5562231759656653, |
|
"grad_norm": 2.1051104068756104, |
|
"learning_rate": 4.101149425287357e-06, |
|
"loss": 0.2131, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.5590844062947067, |
|
"grad_norm": 1.787436842918396, |
|
"learning_rate": 4.089655172413794e-06, |
|
"loss": 0.2099, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.561945636623748, |
|
"grad_norm": 1.9237557649612427, |
|
"learning_rate": 4.0781609195402295e-06, |
|
"loss": 0.1873, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.5648068669527897, |
|
"grad_norm": 1.709716558456421, |
|
"learning_rate": 4.066666666666667e-06, |
|
"loss": 0.1762, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.5676680972818313, |
|
"grad_norm": 2.1726202964782715, |
|
"learning_rate": 4.0551724137931036e-06, |
|
"loss": 0.1982, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.5705293276108727, |
|
"grad_norm": 1.8287854194641113, |
|
"learning_rate": 4.043678160919541e-06, |
|
"loss": 0.168, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.5733905579399141, |
|
"grad_norm": 2.111295223236084, |
|
"learning_rate": 4.0321839080459776e-06, |
|
"loss": 0.1783, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.5762517882689555, |
|
"grad_norm": 3.8943326473236084, |
|
"learning_rate": 4.020689655172414e-06, |
|
"loss": 0.2125, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.5791130185979971, |
|
"grad_norm": 1.7108508348464966, |
|
"learning_rate": 4.009195402298851e-06, |
|
"loss": 0.1765, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.5819742489270388, |
|
"grad_norm": 1.863000750541687, |
|
"learning_rate": 3.997701149425287e-06, |
|
"loss": 0.2175, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.5848354792560801, |
|
"grad_norm": 2.054055690765381, |
|
"learning_rate": 3.986206896551724e-06, |
|
"loss": 0.2081, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.5876967095851215, |
|
"grad_norm": 7.339261054992676, |
|
"learning_rate": 3.974712643678161e-06, |
|
"loss": 0.1797, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.590557939914163, |
|
"grad_norm": 1.821616530418396, |
|
"learning_rate": 3.963218390804598e-06, |
|
"loss": 0.1712, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.5934191702432046, |
|
"grad_norm": 4.537174701690674, |
|
"learning_rate": 3.951724137931035e-06, |
|
"loss": 0.2074, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.5962804005722462, |
|
"grad_norm": 1.9394702911376953, |
|
"learning_rate": 3.940229885057472e-06, |
|
"loss": 0.1899, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.5991416309012876, |
|
"grad_norm": 2.021707773208618, |
|
"learning_rate": 3.9287356321839085e-06, |
|
"loss": 0.2004, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.602002861230329, |
|
"grad_norm": 2.337878942489624, |
|
"learning_rate": 3.917241379310345e-06, |
|
"loss": 0.2254, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6048640915593704, |
|
"grad_norm": 2.3269996643066406, |
|
"learning_rate": 3.905747126436782e-06, |
|
"loss": 0.218, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.607725321888412, |
|
"grad_norm": 1.7878772020339966, |
|
"learning_rate": 3.894252873563218e-06, |
|
"loss": 0.1816, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.6105865522174536, |
|
"grad_norm": 1.7744303941726685, |
|
"learning_rate": 3.882758620689656e-06, |
|
"loss": 0.1694, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.613447782546495, |
|
"grad_norm": 1.849387288093567, |
|
"learning_rate": 3.871264367816092e-06, |
|
"loss": 0.1892, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.6163090128755364, |
|
"grad_norm": 2.10662841796875, |
|
"learning_rate": 3.85977011494253e-06, |
|
"loss": 0.1889, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.6191702432045778, |
|
"grad_norm": 1.9940298795700073, |
|
"learning_rate": 3.848275862068966e-06, |
|
"loss": 0.2051, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.6220314735336194, |
|
"grad_norm": 1.9454128742218018, |
|
"learning_rate": 3.836781609195403e-06, |
|
"loss": 0.204, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.624892703862661, |
|
"grad_norm": 1.6831291913986206, |
|
"learning_rate": 3.825287356321839e-06, |
|
"loss": 0.1882, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.6277539341917024, |
|
"grad_norm": 2.420912265777588, |
|
"learning_rate": 3.813793103448276e-06, |
|
"loss": 0.1869, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.6306151645207438, |
|
"grad_norm": 1.7464041709899902, |
|
"learning_rate": 3.8022988505747126e-06, |
|
"loss": 0.1921, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.6334763948497855, |
|
"grad_norm": 1.9783605337142944, |
|
"learning_rate": 3.79080459770115e-06, |
|
"loss": 0.1892, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.6363376251788269, |
|
"grad_norm": 1.7680976390838623, |
|
"learning_rate": 3.7793103448275866e-06, |
|
"loss": 0.1851, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.6391988555078685, |
|
"grad_norm": 2.4723477363586426, |
|
"learning_rate": 3.7678160919540236e-06, |
|
"loss": 0.2011, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.6420600858369099, |
|
"grad_norm": 2.6624374389648438, |
|
"learning_rate": 3.75632183908046e-06, |
|
"loss": 0.191, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.6449213161659513, |
|
"grad_norm": 2.0972347259521484, |
|
"learning_rate": 3.7448275862068967e-06, |
|
"loss": 0.2108, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.647782546494993, |
|
"grad_norm": 2.13672137260437, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"loss": 0.2003, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.6506437768240343, |
|
"grad_norm": 1.685014009475708, |
|
"learning_rate": 3.7218390804597703e-06, |
|
"loss": 0.1727, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.653505007153076, |
|
"grad_norm": 2.269775390625, |
|
"learning_rate": 3.710344827586207e-06, |
|
"loss": 0.2263, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.6563662374821173, |
|
"grad_norm": 2.918632745742798, |
|
"learning_rate": 3.698850574712644e-06, |
|
"loss": 0.199, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.6592274678111587, |
|
"grad_norm": 1.939340353012085, |
|
"learning_rate": 3.6873563218390805e-06, |
|
"loss": 0.1753, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.6620886981402003, |
|
"grad_norm": 2.1699063777923584, |
|
"learning_rate": 3.675862068965518e-06, |
|
"loss": 0.2076, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.6649499284692417, |
|
"grad_norm": 5.3711466789245605, |
|
"learning_rate": 3.6643678160919545e-06, |
|
"loss": 0.1819, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.6678111587982833, |
|
"grad_norm": 12.513273239135742, |
|
"learning_rate": 3.652873563218391e-06, |
|
"loss": 0.1815, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.6706723891273247, |
|
"grad_norm": 1.9002474546432495, |
|
"learning_rate": 3.641379310344828e-06, |
|
"loss": 0.2064, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.6735336194563661, |
|
"grad_norm": 2.12951922416687, |
|
"learning_rate": 3.6298850574712646e-06, |
|
"loss": 0.2099, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.6763948497854078, |
|
"grad_norm": 2.566774845123291, |
|
"learning_rate": 3.6183908045977012e-06, |
|
"loss": 0.1969, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.6792560801144494, |
|
"grad_norm": 1.7637192010879517, |
|
"learning_rate": 3.6068965517241382e-06, |
|
"loss": 0.1889, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.6821173104434908, |
|
"grad_norm": 1.8705408573150635, |
|
"learning_rate": 3.595402298850575e-06, |
|
"loss": 0.1916, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.6849785407725322, |
|
"grad_norm": 3.1661245822906494, |
|
"learning_rate": 3.5839080459770122e-06, |
|
"loss": 0.1919, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.6878397711015736, |
|
"grad_norm": 2.1932051181793213, |
|
"learning_rate": 3.5724137931034484e-06, |
|
"loss": 0.1978, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.6907010014306152, |
|
"grad_norm": 1.9052879810333252, |
|
"learning_rate": 3.560919540229885e-06, |
|
"loss": 0.1745, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.6935622317596568, |
|
"grad_norm": 2.185124397277832, |
|
"learning_rate": 3.5494252873563224e-06, |
|
"loss": 0.2084, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.6964234620886982, |
|
"grad_norm": 1.7581264972686768, |
|
"learning_rate": 3.537931034482759e-06, |
|
"loss": 0.1929, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.6992846924177396, |
|
"grad_norm": 1.7022705078125, |
|
"learning_rate": 3.5264367816091956e-06, |
|
"loss": 0.201, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.702145922746781, |
|
"grad_norm": 2.636112928390503, |
|
"learning_rate": 3.5149425287356326e-06, |
|
"loss": 0.2293, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.7050071530758226, |
|
"grad_norm": 1.6399792432785034, |
|
"learning_rate": 3.503448275862069e-06, |
|
"loss": 0.2029, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.7078683834048642, |
|
"grad_norm": 1.7805216312408447, |
|
"learning_rate": 3.491954022988506e-06, |
|
"loss": 0.1911, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.7107296137339056, |
|
"grad_norm": 3.3004565238952637, |
|
"learning_rate": 3.4804597701149427e-06, |
|
"loss": 0.1818, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.713590844062947, |
|
"grad_norm": 2.374055862426758, |
|
"learning_rate": 3.4689655172413793e-06, |
|
"loss": 0.1959, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.7164520743919884, |
|
"grad_norm": 1.8845436573028564, |
|
"learning_rate": 3.4574712643678167e-06, |
|
"loss": 0.17, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7164520743919884, |
|
"eval_bleu": 0.6118896028399405, |
|
"eval_cer": 14.497737420959572, |
|
"eval_loss": 0.5691251754760742, |
|
"eval_runtime": 456.0743, |
|
"eval_samples_per_second": 2.901, |
|
"eval_steps_per_second": 0.364, |
|
"eval_wer": 27.064476999160213, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.07563909152768e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|