{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.030090270812437, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05015045135406219, "grad_norm": 7.437633514404297, "learning_rate": 5.000000000000001e-07, "loss": 0.9756, "step": 25 }, { "epoch": 0.10030090270812438, "grad_norm": 3.9726901054382324, "learning_rate": 1.0000000000000002e-06, "loss": 0.8322, "step": 50 }, { "epoch": 0.15045135406218657, "grad_norm": 3.629199743270874, "learning_rate": 1.5e-06, "loss": 0.5489, "step": 75 }, { "epoch": 0.20060180541624875, "grad_norm": 3.2304935455322266, "learning_rate": 2.0000000000000003e-06, "loss": 0.4407, "step": 100 }, { "epoch": 0.25075225677031093, "grad_norm": 3.2816317081451416, "learning_rate": 2.5e-06, "loss": 0.4424, "step": 125 }, { "epoch": 0.30090270812437314, "grad_norm": 3.1936333179473877, "learning_rate": 3e-06, "loss": 0.4074, "step": 150 }, { "epoch": 0.3510531594784353, "grad_norm": 3.1005301475524902, "learning_rate": 3.5e-06, "loss": 0.4104, "step": 175 }, { "epoch": 0.4012036108324975, "grad_norm": 3.329033851623535, "learning_rate": 4.000000000000001e-06, "loss": 0.4049, "step": 200 }, { "epoch": 0.45135406218655966, "grad_norm": 2.9963624477386475, "learning_rate": 4.5e-06, "loss": 0.3773, "step": 225 }, { "epoch": 0.5015045135406219, "grad_norm": 2.877650260925293, "learning_rate": 5e-06, "loss": 0.3659, "step": 250 }, { "epoch": 0.551654964894684, "grad_norm": 3.1805803775787354, "learning_rate": 5.500000000000001e-06, "loss": 0.3686, "step": 275 }, { "epoch": 0.6018054162487463, "grad_norm": 2.518958806991577, "learning_rate": 6e-06, "loss": 0.362, "step": 300 }, { "epoch": 0.6519558676028084, "grad_norm": 2.588172674179077, "learning_rate": 6.5000000000000004e-06, "loss": 0.3603, "step": 325 }, { "epoch": 0.7021063189568706, "grad_norm": 3.022862672805786, "learning_rate": 7e-06, "loss": 0.3835, "step": 350 }, { "epoch": 0.7522567703109327, "grad_norm": 2.3474695682525635, "learning_rate": 7.500000000000001e-06, "loss": 0.3567, "step": 375 }, { "epoch": 0.802407221664995, "grad_norm": 2.847740411758423, "learning_rate": 8.000000000000001e-06, "loss": 0.3566, "step": 400 }, { "epoch": 0.8525576730190572, "grad_norm": 2.8755927085876465, "learning_rate": 8.5e-06, "loss": 0.3552, "step": 425 }, { "epoch": 0.9027081243731193, "grad_norm": 2.488140106201172, "learning_rate": 9e-06, "loss": 0.3574, "step": 450 }, { "epoch": 0.9528585757271816, "grad_norm": 3.7667624950408936, "learning_rate": 9.5e-06, "loss": 0.34, "step": 475 }, { "epoch": 1.0030090270812437, "grad_norm": 2.2906906604766846, "learning_rate": 1e-05, "loss": 0.332, "step": 500 }, { "epoch": 1.053159478435306, "grad_norm": 2.970755100250244, "learning_rate": 9.944444444444445e-06, "loss": 0.245, "step": 525 }, { "epoch": 1.103309929789368, "grad_norm": 2.735118865966797, "learning_rate": 9.88888888888889e-06, "loss": 0.2632, "step": 550 }, { "epoch": 1.1534603811434303, "grad_norm": 2.1645102500915527, "learning_rate": 9.833333333333333e-06, "loss": 0.2485, "step": 575 }, { "epoch": 1.2036108324974926, "grad_norm": 2.3044114112854004, "learning_rate": 9.777777777777779e-06, "loss": 0.2545, "step": 600 }, { "epoch": 1.2537612838515546, "grad_norm": 2.445683240890503, "learning_rate": 9.722222222222223e-06, "loss": 0.2398, "step": 625 }, { "epoch": 1.3039117352056169, "grad_norm": 1.9902832508087158, "learning_rate": 9.666666666666667e-06, "loss": 0.2374, "step": 650 }, { "epoch": 1.3540621865596791, "grad_norm": 2.571229934692383, "learning_rate": 9.611111111111112e-06, "loss": 0.2532, "step": 675 }, { "epoch": 1.4042126379137412, "grad_norm": 2.24031662940979, "learning_rate": 9.555555555555556e-06, "loss": 0.2466, "step": 700 }, { "epoch": 1.4543630892678034, "grad_norm": 2.2203283309936523, "learning_rate": 9.5e-06, "loss": 0.2414, "step": 725 }, { "epoch": 1.5045135406218657, "grad_norm": 2.821672201156616, "learning_rate": 9.444444444444445e-06, "loss": 0.2612, "step": 750 }, { "epoch": 1.5546639919759278, "grad_norm": 2.279815196990967, "learning_rate": 9.38888888888889e-06, "loss": 0.2361, "step": 775 }, { "epoch": 1.60481444332999, "grad_norm": 2.2528128623962402, "learning_rate": 9.333333333333334e-06, "loss": 0.2337, "step": 800 }, { "epoch": 1.6549648946840523, "grad_norm": 2.5302581787109375, "learning_rate": 9.277777777777778e-06, "loss": 0.2407, "step": 825 }, { "epoch": 1.7051153460381143, "grad_norm": 2.0469911098480225, "learning_rate": 9.222222222222224e-06, "loss": 0.2603, "step": 850 }, { "epoch": 1.7552657973921764, "grad_norm": 2.5774879455566406, "learning_rate": 9.166666666666666e-06, "loss": 0.2441, "step": 875 }, { "epoch": 1.8054162487462388, "grad_norm": 2.362243175506592, "learning_rate": 9.111111111111112e-06, "loss": 0.2315, "step": 900 }, { "epoch": 1.855566700100301, "grad_norm": 2.5620100498199463, "learning_rate": 9.055555555555556e-06, "loss": 0.2388, "step": 925 }, { "epoch": 1.905717151454363, "grad_norm": 2.1473169326782227, "learning_rate": 9e-06, "loss": 0.23, "step": 950 }, { "epoch": 1.9558676028084254, "grad_norm": 2.693861722946167, "learning_rate": 8.944444444444446e-06, "loss": 0.2491, "step": 975 }, { "epoch": 2.0060180541624875, "grad_norm": 1.6720950603485107, "learning_rate": 8.888888888888888e-06, "loss": 0.2144, "step": 1000 }, { "epoch": 2.0060180541624875, "eval_loss": 0.3075518310070038, "eval_runtime": 4546.1548, "eval_samples_per_second": 2.367, "eval_steps_per_second": 0.148, "eval_wer": 0.2114135961824298, "step": 1000 }, { "epoch": 2.0561685055165495, "grad_norm": 2.0117599964141846, "learning_rate": 8.833333333333334e-06, "loss": 0.1061, "step": 1025 }, { "epoch": 2.106318956870612, "grad_norm": 1.9226505756378174, "learning_rate": 8.777777777777778e-06, "loss": 0.1171, "step": 1050 }, { "epoch": 2.156469408224674, "grad_norm": 2.096928358078003, "learning_rate": 8.722222222222224e-06, "loss": 0.1135, "step": 1075 }, { "epoch": 2.206619859578736, "grad_norm": 1.7984790802001953, "learning_rate": 8.666666666666668e-06, "loss": 0.1164, "step": 1100 }, { "epoch": 2.2567703109327986, "grad_norm": 1.6253271102905273, "learning_rate": 8.611111111111112e-06, "loss": 0.1136, "step": 1125 }, { "epoch": 2.3069207622868606, "grad_norm": 1.7676957845687866, "learning_rate": 8.555555555555556e-06, "loss": 0.1207, "step": 1150 }, { "epoch": 2.3570712136409226, "grad_norm": 2.040688991546631, "learning_rate": 8.5e-06, "loss": 0.1148, "step": 1175 }, { "epoch": 2.407221664994985, "grad_norm": 2.272294282913208, "learning_rate": 8.444444444444446e-06, "loss": 0.1222, "step": 1200 }, { "epoch": 2.457372116349047, "grad_norm": 2.0784871578216553, "learning_rate": 8.38888888888889e-06, "loss": 0.1188, "step": 1225 }, { "epoch": 2.507522567703109, "grad_norm": 1.869183897972107, "learning_rate": 8.333333333333334e-06, "loss": 0.1187, "step": 1250 }, { "epoch": 2.5576730190571713, "grad_norm": 2.0832390785217285, "learning_rate": 8.277777777777778e-06, "loss": 0.1176, "step": 1275 }, { "epoch": 2.6078234704112337, "grad_norm": 1.9388813972473145, "learning_rate": 8.222222222222222e-06, "loss": 0.1146, "step": 1300 }, { "epoch": 2.657973921765296, "grad_norm": 1.659645676612854, "learning_rate": 8.166666666666668e-06, "loss": 0.116, "step": 1325 }, { "epoch": 2.7081243731193583, "grad_norm": 1.7133307456970215, "learning_rate": 8.111111111111112e-06, "loss": 0.1201, "step": 1350 }, { "epoch": 2.7582748244734203, "grad_norm": 2.0739212036132812, "learning_rate": 8.055555555555557e-06, "loss": 0.1177, "step": 1375 }, { "epoch": 2.8084252758274824, "grad_norm": 1.760362148284912, "learning_rate": 8.000000000000001e-06, "loss": 0.1071, "step": 1400 }, { "epoch": 2.8585757271815444, "grad_norm": 1.979827880859375, "learning_rate": 7.944444444444445e-06, "loss": 0.1141, "step": 1425 }, { "epoch": 2.908726178535607, "grad_norm": 1.8925753831863403, "learning_rate": 7.88888888888889e-06, "loss": 0.1272, "step": 1450 }, { "epoch": 2.958876629889669, "grad_norm": 1.9880926609039307, "learning_rate": 7.833333333333333e-06, "loss": 0.1189, "step": 1475 }, { "epoch": 3.0090270812437314, "grad_norm": 1.2164592742919922, "learning_rate": 7.77777777777778e-06, "loss": 0.1091, "step": 1500 }, { "epoch": 3.0591775325977935, "grad_norm": 1.2302759885787964, "learning_rate": 7.722222222222223e-06, "loss": 0.0472, "step": 1525 }, { "epoch": 3.1093279839518555, "grad_norm": 1.2639251947402954, "learning_rate": 7.666666666666667e-06, "loss": 0.048, "step": 1550 }, { "epoch": 3.1594784353059175, "grad_norm": 1.2657603025436401, "learning_rate": 7.611111111111111e-06, "loss": 0.0488, "step": 1575 }, { "epoch": 3.20962888665998, "grad_norm": 1.3162034749984741, "learning_rate": 7.555555555555556e-06, "loss": 0.0486, "step": 1600 }, { "epoch": 3.259779338014042, "grad_norm": 1.4205294847488403, "learning_rate": 7.500000000000001e-06, "loss": 0.0484, "step": 1625 }, { "epoch": 3.3099297893681046, "grad_norm": 1.3729196786880493, "learning_rate": 7.444444444444445e-06, "loss": 0.0512, "step": 1650 }, { "epoch": 3.3600802407221666, "grad_norm": 1.333731770515442, "learning_rate": 7.38888888888889e-06, "loss": 0.05, "step": 1675 }, { "epoch": 3.4102306920762286, "grad_norm": 1.2979406118392944, "learning_rate": 7.333333333333333e-06, "loss": 0.0535, "step": 1700 }, { "epoch": 3.4603811434302907, "grad_norm": 1.1790673732757568, "learning_rate": 7.277777777777778e-06, "loss": 0.0598, "step": 1725 }, { "epoch": 3.510531594784353, "grad_norm": 1.7529319524765015, "learning_rate": 7.222222222222223e-06, "loss": 0.0539, "step": 1750 }, { "epoch": 3.560682046138415, "grad_norm": 1.5352625846862793, "learning_rate": 7.166666666666667e-06, "loss": 0.0494, "step": 1775 }, { "epoch": 3.6108324974924777, "grad_norm": 1.707581639289856, "learning_rate": 7.111111111111112e-06, "loss": 0.051, "step": 1800 }, { "epoch": 3.6609829488465397, "grad_norm": 1.3295681476593018, "learning_rate": 7.055555555555557e-06, "loss": 0.0545, "step": 1825 }, { "epoch": 3.711133400200602, "grad_norm": 1.3010529279708862, "learning_rate": 7e-06, "loss": 0.0534, "step": 1850 }, { "epoch": 3.761283851554664, "grad_norm": 1.0611276626586914, "learning_rate": 6.944444444444445e-06, "loss": 0.0509, "step": 1875 }, { "epoch": 3.8114343029087263, "grad_norm": 1.1071949005126953, "learning_rate": 6.88888888888889e-06, "loss": 0.0496, "step": 1900 }, { "epoch": 3.8615847542627884, "grad_norm": 1.507038950920105, "learning_rate": 6.833333333333334e-06, "loss": 0.0494, "step": 1925 }, { "epoch": 3.9117352056168504, "grad_norm": 1.2737189531326294, "learning_rate": 6.777777777777779e-06, "loss": 0.052, "step": 1950 }, { "epoch": 3.961885656970913, "grad_norm": 1.8823206424713135, "learning_rate": 6.7222222222222235e-06, "loss": 0.053, "step": 1975 }, { "epoch": 4.012036108324975, "grad_norm": 0.7559254765510559, "learning_rate": 6.666666666666667e-06, "loss": 0.0472, "step": 2000 }, { "epoch": 4.012036108324975, "eval_loss": 0.33296459913253784, "eval_runtime": 4527.5772, "eval_samples_per_second": 2.377, "eval_steps_per_second": 0.149, "eval_wer": 0.20142963310646952, "step": 2000 }, { "epoch": 4.062186559679037, "grad_norm": 0.8223534822463989, "learning_rate": 6.6111111111111115e-06, "loss": 0.0218, "step": 2025 }, { "epoch": 4.112337011033099, "grad_norm": 0.7893447279930115, "learning_rate": 6.555555555555556e-06, "loss": 0.0201, "step": 2050 }, { "epoch": 4.162487462387162, "grad_norm": 0.657762348651886, "learning_rate": 6.5000000000000004e-06, "loss": 0.0203, "step": 2075 }, { "epoch": 4.212637913741224, "grad_norm": 0.9496999979019165, "learning_rate": 6.444444444444445e-06, "loss": 0.018, "step": 2100 }, { "epoch": 4.262788365095286, "grad_norm": 1.3166756629943848, "learning_rate": 6.3888888888888885e-06, "loss": 0.0203, "step": 2125 }, { "epoch": 4.312938816449348, "grad_norm": 0.8114544749259949, "learning_rate": 6.333333333333333e-06, "loss": 0.022, "step": 2150 }, { "epoch": 4.36308926780341, "grad_norm": 1.0165122747421265, "learning_rate": 6.277777777777778e-06, "loss": 0.0188, "step": 2175 }, { "epoch": 4.413239719157472, "grad_norm": 0.8669913411140442, "learning_rate": 6.222222222222223e-06, "loss": 0.0204, "step": 2200 }, { "epoch": 4.463390170511534, "grad_norm": 1.483972430229187, "learning_rate": 6.166666666666667e-06, "loss": 0.0199, "step": 2225 }, { "epoch": 4.513540621865597, "grad_norm": 1.1734397411346436, "learning_rate": 6.111111111111112e-06, "loss": 0.0194, "step": 2250 }, { "epoch": 4.563691073219659, "grad_norm": 0.5651412606239319, "learning_rate": 6.055555555555555e-06, "loss": 0.0219, "step": 2275 }, { "epoch": 4.613841524573721, "grad_norm": 0.9663107395172119, "learning_rate": 6e-06, "loss": 0.0215, "step": 2300 }, { "epoch": 4.663991975927783, "grad_norm": 0.6868607401847839, "learning_rate": 5.944444444444445e-06, "loss": 0.0223, "step": 2325 }, { "epoch": 4.714142427281845, "grad_norm": 1.138351321220398, "learning_rate": 5.88888888888889e-06, "loss": 0.0215, "step": 2350 }, { "epoch": 4.764292878635907, "grad_norm": 0.7841187119483948, "learning_rate": 5.833333333333334e-06, "loss": 0.0207, "step": 2375 }, { "epoch": 4.81444332998997, "grad_norm": 0.6683496236801147, "learning_rate": 5.777777777777778e-06, "loss": 0.0201, "step": 2400 }, { "epoch": 4.864593781344032, "grad_norm": 1.2674401998519897, "learning_rate": 5.722222222222222e-06, "loss": 0.0196, "step": 2425 }, { "epoch": 4.914744232698094, "grad_norm": 0.8776654601097107, "learning_rate": 5.666666666666667e-06, "loss": 0.0214, "step": 2450 }, { "epoch": 4.964894684052156, "grad_norm": 0.8897686004638672, "learning_rate": 5.611111111111112e-06, "loss": 0.0201, "step": 2475 }, { "epoch": 5.015045135406218, "grad_norm": 0.6467758417129517, "learning_rate": 5.555555555555557e-06, "loss": 0.0176, "step": 2500 }, { "epoch": 5.0651955867602805, "grad_norm": 0.8919677734375, "learning_rate": 5.500000000000001e-06, "loss": 0.0099, "step": 2525 }, { "epoch": 5.115346038114343, "grad_norm": 0.23880383372306824, "learning_rate": 5.444444444444445e-06, "loss": 0.0089, "step": 2550 }, { "epoch": 5.1654964894684054, "grad_norm": 0.7303509712219238, "learning_rate": 5.388888888888889e-06, "loss": 0.0093, "step": 2575 }, { "epoch": 5.2156469408224675, "grad_norm": 0.8609201312065125, "learning_rate": 5.333333333333334e-06, "loss": 0.0083, "step": 2600 }, { "epoch": 5.2657973921765295, "grad_norm": 0.4490683078765869, "learning_rate": 5.2777777777777785e-06, "loss": 0.009, "step": 2625 }, { "epoch": 5.315947843530592, "grad_norm": 0.5905369520187378, "learning_rate": 5.2222222222222226e-06, "loss": 0.009, "step": 2650 }, { "epoch": 5.366098294884654, "grad_norm": 0.18808700144290924, "learning_rate": 5.1666666666666675e-06, "loss": 0.0075, "step": 2675 }, { "epoch": 5.4162487462387165, "grad_norm": 0.8524003624916077, "learning_rate": 5.1111111111111115e-06, "loss": 0.0084, "step": 2700 }, { "epoch": 5.466399197592779, "grad_norm": 0.31983518600463867, "learning_rate": 5.0555555555555555e-06, "loss": 0.0081, "step": 2725 }, { "epoch": 5.516549648946841, "grad_norm": 0.40100687742233276, "learning_rate": 5e-06, "loss": 0.0077, "step": 2750 }, { "epoch": 5.566700100300903, "grad_norm": 1.5288898944854736, "learning_rate": 4.944444444444445e-06, "loss": 0.0077, "step": 2775 }, { "epoch": 5.616850551654965, "grad_norm": 0.15371347963809967, "learning_rate": 4.888888888888889e-06, "loss": 0.0068, "step": 2800 }, { "epoch": 5.667001003009027, "grad_norm": 0.547875702381134, "learning_rate": 4.833333333333333e-06, "loss": 0.0078, "step": 2825 }, { "epoch": 5.717151454363089, "grad_norm": 0.3743741512298584, "learning_rate": 4.777777777777778e-06, "loss": 0.0105, "step": 2850 }, { "epoch": 5.767301905717152, "grad_norm": 1.20223867893219, "learning_rate": 4.722222222222222e-06, "loss": 0.0084, "step": 2875 }, { "epoch": 5.817452357071214, "grad_norm": 0.4010525047779083, "learning_rate": 4.666666666666667e-06, "loss": 0.0074, "step": 2900 }, { "epoch": 5.867602808425276, "grad_norm": 0.2383079081773758, "learning_rate": 4.611111111111112e-06, "loss": 0.0097, "step": 2925 }, { "epoch": 5.917753259779338, "grad_norm": 0.9694074988365173, "learning_rate": 4.555555555555556e-06, "loss": 0.0082, "step": 2950 }, { "epoch": 5.9679037111334, "grad_norm": 0.4873412549495697, "learning_rate": 4.5e-06, "loss": 0.0094, "step": 2975 }, { "epoch": 6.018054162487463, "grad_norm": 0.35399332642555237, "learning_rate": 4.444444444444444e-06, "loss": 0.0074, "step": 3000 }, { "epoch": 6.018054162487463, "eval_loss": 0.3955759108066559, "eval_runtime": 4515.9024, "eval_samples_per_second": 2.383, "eval_steps_per_second": 0.149, "eval_wer": 0.19721505123992802, "step": 3000 }, { "epoch": 6.068204613841525, "grad_norm": 0.32993289828300476, "learning_rate": 4.388888888888889e-06, "loss": 0.005, "step": 3025 }, { "epoch": 6.118355065195587, "grad_norm": 2.4931156635284424, "learning_rate": 4.333333333333334e-06, "loss": 0.0047, "step": 3050 }, { "epoch": 6.168505516549649, "grad_norm": 0.09684254229068756, "learning_rate": 4.277777777777778e-06, "loss": 0.0039, "step": 3075 }, { "epoch": 6.218655967903711, "grad_norm": 0.1377803087234497, "learning_rate": 4.222222222222223e-06, "loss": 0.0041, "step": 3100 }, { "epoch": 6.268806419257773, "grad_norm": 0.12329312413930893, "learning_rate": 4.166666666666667e-06, "loss": 0.0036, "step": 3125 }, { "epoch": 6.318956870611835, "grad_norm": 0.39106011390686035, "learning_rate": 4.111111111111111e-06, "loss": 0.0038, "step": 3150 }, { "epoch": 6.369107321965898, "grad_norm": 0.21228019893169403, "learning_rate": 4.055555555555556e-06, "loss": 0.0034, "step": 3175 }, { "epoch": 6.41925777331996, "grad_norm": 0.13951948285102844, "learning_rate": 4.000000000000001e-06, "loss": 0.0035, "step": 3200 }, { "epoch": 6.469408224674022, "grad_norm": 0.15315498411655426, "learning_rate": 3.944444444444445e-06, "loss": 0.0045, "step": 3225 }, { "epoch": 6.519558676028084, "grad_norm": 0.09918154031038284, "learning_rate": 3.88888888888889e-06, "loss": 0.0037, "step": 3250 }, { "epoch": 6.569709127382146, "grad_norm": 0.128856360912323, "learning_rate": 3.833333333333334e-06, "loss": 0.0034, "step": 3275 }, { "epoch": 6.619859578736209, "grad_norm": 0.35564741492271423, "learning_rate": 3.777777777777778e-06, "loss": 0.0035, "step": 3300 }, { "epoch": 6.670010030090271, "grad_norm": 0.09503413736820221, "learning_rate": 3.7222222222222225e-06, "loss": 0.0047, "step": 3325 }, { "epoch": 6.720160481444333, "grad_norm": 0.2272273600101471, "learning_rate": 3.6666666666666666e-06, "loss": 0.004, "step": 3350 }, { "epoch": 6.770310932798395, "grad_norm": 0.31332728266716003, "learning_rate": 3.6111111111111115e-06, "loss": 0.0038, "step": 3375 }, { "epoch": 6.820461384152457, "grad_norm": 0.20381228625774384, "learning_rate": 3.555555555555556e-06, "loss": 0.0044, "step": 3400 }, { "epoch": 6.870611835506519, "grad_norm": 0.38565367460250854, "learning_rate": 3.5e-06, "loss": 0.0052, "step": 3425 }, { "epoch": 6.920762286860581, "grad_norm": 0.1613508015871048, "learning_rate": 3.444444444444445e-06, "loss": 0.0037, "step": 3450 }, { "epoch": 6.970912738214644, "grad_norm": 0.23183399438858032, "learning_rate": 3.3888888888888893e-06, "loss": 0.0042, "step": 3475 }, { "epoch": 7.021063189568706, "grad_norm": 0.09113147854804993, "learning_rate": 3.3333333333333333e-06, "loss": 0.0028, "step": 3500 }, { "epoch": 7.071213640922768, "grad_norm": 0.12553289532661438, "learning_rate": 3.277777777777778e-06, "loss": 0.0026, "step": 3525 }, { "epoch": 7.12136409227683, "grad_norm": 0.0897342711687088, "learning_rate": 3.2222222222222227e-06, "loss": 0.002, "step": 3550 }, { "epoch": 7.1715145436308925, "grad_norm": 0.07827088236808777, "learning_rate": 3.1666666666666667e-06, "loss": 0.0024, "step": 3575 }, { "epoch": 7.2216649949849545, "grad_norm": 0.055039647966623306, "learning_rate": 3.1111111111111116e-06, "loss": 0.0024, "step": 3600 }, { "epoch": 7.271815446339017, "grad_norm": 0.06994307041168213, "learning_rate": 3.055555555555556e-06, "loss": 0.0021, "step": 3625 }, { "epoch": 7.3219658976930795, "grad_norm": 0.10969334840774536, "learning_rate": 3e-06, "loss": 0.0023, "step": 3650 }, { "epoch": 7.3721163490471415, "grad_norm": 0.08186643570661545, "learning_rate": 2.944444444444445e-06, "loss": 0.0022, "step": 3675 }, { "epoch": 7.422266800401204, "grad_norm": 0.08872943371534348, "learning_rate": 2.888888888888889e-06, "loss": 0.0027, "step": 3700 }, { "epoch": 7.472417251755266, "grad_norm": 0.20957154035568237, "learning_rate": 2.8333333333333335e-06, "loss": 0.0027, "step": 3725 }, { "epoch": 7.522567703109328, "grad_norm": 0.09654885530471802, "learning_rate": 2.7777777777777783e-06, "loss": 0.0021, "step": 3750 }, { "epoch": 7.57271815446339, "grad_norm": 0.10487361997365952, "learning_rate": 2.7222222222222224e-06, "loss": 0.0041, "step": 3775 }, { "epoch": 7.622868605817453, "grad_norm": 0.06899241358041763, "learning_rate": 2.666666666666667e-06, "loss": 0.0025, "step": 3800 }, { "epoch": 7.673019057171515, "grad_norm": 0.06502360850572586, "learning_rate": 2.6111111111111113e-06, "loss": 0.0025, "step": 3825 }, { "epoch": 7.723169508525577, "grad_norm": 0.05213673412799835, "learning_rate": 2.5555555555555557e-06, "loss": 0.0026, "step": 3850 }, { "epoch": 7.773319959879639, "grad_norm": 0.11386407911777496, "learning_rate": 2.5e-06, "loss": 0.0023, "step": 3875 }, { "epoch": 7.823470411233701, "grad_norm": 0.08614542335271835, "learning_rate": 2.4444444444444447e-06, "loss": 0.002, "step": 3900 }, { "epoch": 7.873620862587764, "grad_norm": 0.12035714089870453, "learning_rate": 2.388888888888889e-06, "loss": 0.0021, "step": 3925 }, { "epoch": 7.923771313941826, "grad_norm": 0.0683644562959671, "learning_rate": 2.3333333333333336e-06, "loss": 0.0023, "step": 3950 }, { "epoch": 7.973921765295888, "grad_norm": 0.059945497661828995, "learning_rate": 2.277777777777778e-06, "loss": 0.002, "step": 3975 }, { "epoch": 8.02407221664995, "grad_norm": 0.050406791269779205, "learning_rate": 2.222222222222222e-06, "loss": 0.0019, "step": 4000 }, { "epoch": 8.02407221664995, "eval_loss": 0.4262579679489136, "eval_runtime": 4522.1238, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.149, "eval_wer": 0.1952691074082766, "step": 4000 }, { "epoch": 8.074222668004012, "grad_norm": 0.04868149757385254, "learning_rate": 2.166666666666667e-06, "loss": 0.0016, "step": 4025 }, { "epoch": 8.124373119358074, "grad_norm": 0.0494823083281517, "learning_rate": 2.1111111111111114e-06, "loss": 0.0018, "step": 4050 }, { "epoch": 8.174523570712136, "grad_norm": 0.05864572152495384, "learning_rate": 2.0555555555555555e-06, "loss": 0.0017, "step": 4075 }, { "epoch": 8.224674022066198, "grad_norm": 0.05070830136537552, "learning_rate": 2.0000000000000003e-06, "loss": 0.0016, "step": 4100 }, { "epoch": 8.27482447342026, "grad_norm": 0.06109990179538727, "learning_rate": 1.944444444444445e-06, "loss": 0.0017, "step": 4125 }, { "epoch": 8.324974924774324, "grad_norm": 0.045594800263643265, "learning_rate": 1.888888888888889e-06, "loss": 0.0017, "step": 4150 }, { "epoch": 8.375125376128386, "grad_norm": 0.049554213881492615, "learning_rate": 1.8333333333333333e-06, "loss": 0.0021, "step": 4175 }, { "epoch": 8.425275827482448, "grad_norm": 0.11533299088478088, "learning_rate": 1.777777777777778e-06, "loss": 0.0015, "step": 4200 }, { "epoch": 8.47542627883651, "grad_norm": 0.043509747833013535, "learning_rate": 1.7222222222222224e-06, "loss": 0.0018, "step": 4225 }, { "epoch": 8.525576730190572, "grad_norm": 0.14743666350841522, "learning_rate": 1.6666666666666667e-06, "loss": 0.002, "step": 4250 }, { "epoch": 8.575727181544634, "grad_norm": 0.08023034036159515, "learning_rate": 1.6111111111111113e-06, "loss": 0.0017, "step": 4275 }, { "epoch": 8.625877632898696, "grad_norm": 0.1176542118191719, "learning_rate": 1.5555555555555558e-06, "loss": 0.0019, "step": 4300 }, { "epoch": 8.676028084252758, "grad_norm": 0.05018528178334236, "learning_rate": 1.5e-06, "loss": 0.0018, "step": 4325 }, { "epoch": 8.72617853560682, "grad_norm": 0.051610466092824936, "learning_rate": 1.4444444444444445e-06, "loss": 0.0016, "step": 4350 }, { "epoch": 8.776328986960882, "grad_norm": 0.04370126128196716, "learning_rate": 1.3888888888888892e-06, "loss": 0.0016, "step": 4375 }, { "epoch": 8.826479438314944, "grad_norm": 0.06507682055234909, "learning_rate": 1.3333333333333334e-06, "loss": 0.0015, "step": 4400 }, { "epoch": 8.876629889669006, "grad_norm": 0.06819342076778412, "learning_rate": 1.2777777777777779e-06, "loss": 0.0019, "step": 4425 }, { "epoch": 8.926780341023068, "grad_norm": 0.06309027969837189, "learning_rate": 1.2222222222222223e-06, "loss": 0.0017, "step": 4450 }, { "epoch": 8.976930792377132, "grad_norm": 0.08104189485311508, "learning_rate": 1.1666666666666668e-06, "loss": 0.0015, "step": 4475 }, { "epoch": 9.027081243731194, "grad_norm": 0.04061448946595192, "learning_rate": 1.111111111111111e-06, "loss": 0.0015, "step": 4500 }, { "epoch": 9.077231695085256, "grad_norm": 0.04562969505786896, "learning_rate": 1.0555555555555557e-06, "loss": 0.0018, "step": 4525 }, { "epoch": 9.127382146439318, "grad_norm": 0.050402842462062836, "learning_rate": 1.0000000000000002e-06, "loss": 0.0015, "step": 4550 }, { "epoch": 9.17753259779338, "grad_norm": 0.5361291170120239, "learning_rate": 9.444444444444445e-07, "loss": 0.0019, "step": 4575 }, { "epoch": 9.227683049147442, "grad_norm": 0.0427754744887352, "learning_rate": 8.88888888888889e-07, "loss": 0.0015, "step": 4600 }, { "epoch": 9.277833500501504, "grad_norm": 0.04894596338272095, "learning_rate": 8.333333333333333e-07, "loss": 0.0015, "step": 4625 }, { "epoch": 9.327983951855567, "grad_norm": 0.04485371708869934, "learning_rate": 7.777777777777779e-07, "loss": 0.0014, "step": 4650 }, { "epoch": 9.378134403209629, "grad_norm": 0.04477512091398239, "learning_rate": 7.222222222222222e-07, "loss": 0.0015, "step": 4675 }, { "epoch": 9.42828485456369, "grad_norm": 0.04668520390987396, "learning_rate": 6.666666666666667e-07, "loss": 0.0015, "step": 4700 }, { "epoch": 9.478435305917753, "grad_norm": 0.05096197500824928, "learning_rate": 6.111111111111112e-07, "loss": 0.0017, "step": 4725 }, { "epoch": 9.528585757271815, "grad_norm": 0.04469752684235573, "learning_rate": 5.555555555555555e-07, "loss": 0.0015, "step": 4750 }, { "epoch": 9.578736208625877, "grad_norm": 0.050533875823020935, "learning_rate": 5.000000000000001e-07, "loss": 0.0014, "step": 4775 }, { "epoch": 9.62888665997994, "grad_norm": 0.04058687761425972, "learning_rate": 4.444444444444445e-07, "loss": 0.0015, "step": 4800 }, { "epoch": 9.679037111334003, "grad_norm": 0.05510278418660164, "learning_rate": 3.8888888888888895e-07, "loss": 0.0014, "step": 4825 }, { "epoch": 9.729187562688065, "grad_norm": 0.05432584136724472, "learning_rate": 3.3333333333333335e-07, "loss": 0.0013, "step": 4850 }, { "epoch": 9.779338014042127, "grad_norm": 0.08223077654838562, "learning_rate": 2.7777777777777776e-07, "loss": 0.0014, "step": 4875 }, { "epoch": 9.829488465396189, "grad_norm": 0.051733825355768204, "learning_rate": 2.2222222222222224e-07, "loss": 0.0014, "step": 4900 }, { "epoch": 9.87963891675025, "grad_norm": 0.03811487555503845, "learning_rate": 1.6666666666666668e-07, "loss": 0.0013, "step": 4925 }, { "epoch": 9.929789368104313, "grad_norm": 0.03971032425761223, "learning_rate": 1.1111111111111112e-07, "loss": 0.0013, "step": 4950 }, { "epoch": 9.979939819458375, "grad_norm": 0.0468224436044693, "learning_rate": 5.555555555555556e-08, "loss": 0.0016, "step": 4975 }, { "epoch": 10.030090270812437, "grad_norm": 0.04448791965842247, "learning_rate": 0.0, "loss": 0.0014, "step": 5000 }, { "epoch": 10.030090270812437, "eval_loss": 0.4447694718837738, "eval_runtime": 4528.3909, "eval_samples_per_second": 2.377, "eval_steps_per_second": 0.149, "eval_wer": 0.1960611749980443, "step": 5000 }, { "epoch": 10.030090270812437, "step": 5000, "total_flos": 5.43090104303616e+20, "train_loss": 0.0887994671344757, "train_runtime": 50408.8653, "train_samples_per_second": 3.174, "train_steps_per_second": 0.099 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 11, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.43090104303616e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }