{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7017031630170316, "eval_steps": 500, "global_step": 7210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.732360097323601e-05, "grad_norm": 16.13226202148005, "learning_rate": 3.2362459546925574e-08, "loss": 1.1997, "step": 1 }, { "epoch": 0.00019464720194647202, "grad_norm": 15.765097511926365, "learning_rate": 6.472491909385115e-08, "loss": 1.384, "step": 2 }, { "epoch": 0.00029197080291970805, "grad_norm": 16.64113665586635, "learning_rate": 9.70873786407767e-08, "loss": 1.2291, "step": 3 }, { "epoch": 0.00038929440389294404, "grad_norm": 20.34864047521242, "learning_rate": 1.294498381877023e-07, "loss": 0.9025, "step": 4 }, { "epoch": 0.00048661800486618007, "grad_norm": 28.710711096046108, "learning_rate": 1.6181229773462782e-07, "loss": 1.0305, "step": 5 }, { "epoch": 0.0005839416058394161, "grad_norm": 21.945801992582915, "learning_rate": 1.941747572815534e-07, "loss": 1.0979, "step": 6 }, { "epoch": 0.0006812652068126521, "grad_norm": 23.947905905644966, "learning_rate": 2.26537216828479e-07, "loss": 1.1909, "step": 7 }, { "epoch": 0.0007785888077858881, "grad_norm": 19.835016686730835, "learning_rate": 2.588996763754046e-07, "loss": 1.2083, "step": 8 }, { "epoch": 0.0008759124087591241, "grad_norm": 16.926846352507788, "learning_rate": 2.9126213592233014e-07, "loss": 1.2369, "step": 9 }, { "epoch": 0.0009732360097323601, "grad_norm": 21.349924470647284, "learning_rate": 3.2362459546925565e-07, "loss": 1.0052, "step": 10 }, { "epoch": 0.0010705596107055961, "grad_norm": 25.127579741628022, "learning_rate": 3.5598705501618125e-07, "loss": 1.2631, "step": 11 }, { "epoch": 0.0011678832116788322, "grad_norm": 12.524049131196549, "learning_rate": 3.883495145631068e-07, "loss": 1.0884, "step": 12 }, { "epoch": 0.001265206812652068, "grad_norm": 20.706648432487587, "learning_rate": 4.207119741100324e-07, "loss": 1.1469, "step": 13 }, { "epoch": 0.0013625304136253042, "grad_norm": 17.655230197318655, "learning_rate": 4.53074433656958e-07, "loss": 1.2922, "step": 14 }, { "epoch": 0.00145985401459854, "grad_norm": 16.550170455008725, "learning_rate": 4.854368932038835e-07, "loss": 1.1792, "step": 15 }, { "epoch": 0.0015571776155717761, "grad_norm": 24.456798845425887, "learning_rate": 5.177993527508092e-07, "loss": 1.0804, "step": 16 }, { "epoch": 0.0016545012165450122, "grad_norm": 14.659117460865279, "learning_rate": 5.501618122977346e-07, "loss": 1.0973, "step": 17 }, { "epoch": 0.0017518248175182481, "grad_norm": 15.324823146378344, "learning_rate": 5.825242718446603e-07, "loss": 0.9791, "step": 18 }, { "epoch": 0.0018491484184914842, "grad_norm": 12.483869597287145, "learning_rate": 6.148867313915858e-07, "loss": 1.0829, "step": 19 }, { "epoch": 0.0019464720194647203, "grad_norm": 11.921211994178957, "learning_rate": 6.472491909385113e-07, "loss": 0.6862, "step": 20 }, { "epoch": 0.0020437956204379564, "grad_norm": 14.53279456676939, "learning_rate": 6.79611650485437e-07, "loss": 0.7814, "step": 21 }, { "epoch": 0.0021411192214111923, "grad_norm": 15.68359520937104, "learning_rate": 7.119741100323625e-07, "loss": 0.883, "step": 22 }, { "epoch": 0.002238442822384428, "grad_norm": 14.062468532950906, "learning_rate": 7.443365695792882e-07, "loss": 1.0087, "step": 23 }, { "epoch": 0.0023357664233576644, "grad_norm": 11.150778403716444, "learning_rate": 7.766990291262136e-07, "loss": 0.4884, "step": 24 }, { "epoch": 0.0024330900243309003, "grad_norm": 7.740982223602688, "learning_rate": 8.090614886731392e-07, "loss": 0.8543, "step": 25 }, { "epoch": 0.002530413625304136, "grad_norm": 6.4338060169141915, "learning_rate": 8.414239482200648e-07, "loss": 0.7948, "step": 26 }, { "epoch": 0.002627737226277372, "grad_norm": 6.227022582398367, "learning_rate": 8.737864077669904e-07, "loss": 0.7814, "step": 27 }, { "epoch": 0.0027250608272506084, "grad_norm": 7.989531820662516, "learning_rate": 9.06148867313916e-07, "loss": 0.5645, "step": 28 }, { "epoch": 0.0028223844282238442, "grad_norm": 6.4745089193753, "learning_rate": 9.385113268608415e-07, "loss": 0.6802, "step": 29 }, { "epoch": 0.00291970802919708, "grad_norm": 8.23650018531745, "learning_rate": 9.70873786407767e-07, "loss": 0.6218, "step": 30 }, { "epoch": 0.0030170316301703164, "grad_norm": 4.915479010119541, "learning_rate": 1.0032362459546926e-06, "loss": 0.8879, "step": 31 }, { "epoch": 0.0031143552311435523, "grad_norm": 4.288138757396447, "learning_rate": 1.0355987055016184e-06, "loss": 0.5917, "step": 32 }, { "epoch": 0.003211678832116788, "grad_norm": 4.230901102531741, "learning_rate": 1.0679611650485437e-06, "loss": 0.7373, "step": 33 }, { "epoch": 0.0033090024330900245, "grad_norm": 4.714303656539792, "learning_rate": 1.1003236245954693e-06, "loss": 0.5886, "step": 34 }, { "epoch": 0.0034063260340632603, "grad_norm": 4.1204943469600925, "learning_rate": 1.132686084142395e-06, "loss": 0.5991, "step": 35 }, { "epoch": 0.0035036496350364962, "grad_norm": 3.124375547961107, "learning_rate": 1.1650485436893206e-06, "loss": 0.432, "step": 36 }, { "epoch": 0.0036009732360097325, "grad_norm": 3.741153837090354, "learning_rate": 1.197411003236246e-06, "loss": 0.6379, "step": 37 }, { "epoch": 0.0036982968369829684, "grad_norm": 3.7740270813504506, "learning_rate": 1.2297734627831717e-06, "loss": 0.5595, "step": 38 }, { "epoch": 0.0037956204379562043, "grad_norm": 4.783986424289694, "learning_rate": 1.2621359223300972e-06, "loss": 0.8717, "step": 39 }, { "epoch": 0.0038929440389294406, "grad_norm": 4.242597978097827, "learning_rate": 1.2944983818770226e-06, "loss": 0.6632, "step": 40 }, { "epoch": 0.0039902676399026765, "grad_norm": 4.309602952976607, "learning_rate": 1.3268608414239483e-06, "loss": 0.7191, "step": 41 }, { "epoch": 0.004087591240875913, "grad_norm": 4.136462382872819, "learning_rate": 1.359223300970874e-06, "loss": 0.6782, "step": 42 }, { "epoch": 0.004184914841849148, "grad_norm": 4.2148643401229, "learning_rate": 1.3915857605177997e-06, "loss": 0.8932, "step": 43 }, { "epoch": 0.0042822384428223845, "grad_norm": 3.829331188520966, "learning_rate": 1.423948220064725e-06, "loss": 0.4697, "step": 44 }, { "epoch": 0.004379562043795621, "grad_norm": 3.4564347781684557, "learning_rate": 1.4563106796116506e-06, "loss": 0.3377, "step": 45 }, { "epoch": 0.004476885644768856, "grad_norm": 3.319649807488789, "learning_rate": 1.4886731391585763e-06, "loss": 0.4589, "step": 46 }, { "epoch": 0.0045742092457420926, "grad_norm": 3.8856546910308034, "learning_rate": 1.5210355987055017e-06, "loss": 0.8413, "step": 47 }, { "epoch": 0.004671532846715329, "grad_norm": 3.7955924171570605, "learning_rate": 1.5533980582524272e-06, "loss": 0.588, "step": 48 }, { "epoch": 0.004768856447688564, "grad_norm": 4.5762685715882805, "learning_rate": 1.585760517799353e-06, "loss": 0.6472, "step": 49 }, { "epoch": 0.004866180048661801, "grad_norm": 4.284420204063246, "learning_rate": 1.6181229773462783e-06, "loss": 0.5233, "step": 50 }, { "epoch": 0.004963503649635037, "grad_norm": 4.0399534913964645, "learning_rate": 1.650485436893204e-06, "loss": 0.6737, "step": 51 }, { "epoch": 0.005060827250608272, "grad_norm": 4.850258079033273, "learning_rate": 1.6828478964401297e-06, "loss": 0.5017, "step": 52 }, { "epoch": 0.005158150851581509, "grad_norm": 3.289730774319516, "learning_rate": 1.715210355987055e-06, "loss": 0.6378, "step": 53 }, { "epoch": 0.005255474452554744, "grad_norm": 3.116783938182044, "learning_rate": 1.7475728155339808e-06, "loss": 0.5681, "step": 54 }, { "epoch": 0.00535279805352798, "grad_norm": 3.5896487509946677, "learning_rate": 1.7799352750809063e-06, "loss": 0.5222, "step": 55 }, { "epoch": 0.005450121654501217, "grad_norm": 3.3627737905222146, "learning_rate": 1.812297734627832e-06, "loss": 0.351, "step": 56 }, { "epoch": 0.005547445255474452, "grad_norm": 3.405981770724818, "learning_rate": 1.8446601941747574e-06, "loss": 0.5832, "step": 57 }, { "epoch": 0.0056447688564476885, "grad_norm": 3.231134680455488, "learning_rate": 1.877022653721683e-06, "loss": 0.5558, "step": 58 }, { "epoch": 0.005742092457420925, "grad_norm": 4.2963387449464605, "learning_rate": 1.9093851132686085e-06, "loss": 0.7544, "step": 59 }, { "epoch": 0.00583941605839416, "grad_norm": 3.3678084152804315, "learning_rate": 1.941747572815534e-06, "loss": 0.554, "step": 60 }, { "epoch": 0.0059367396593673965, "grad_norm": 3.635756089652443, "learning_rate": 1.9741100323624596e-06, "loss": 0.5312, "step": 61 }, { "epoch": 0.006034063260340633, "grad_norm": 3.91764256649437, "learning_rate": 2.006472491909385e-06, "loss": 0.4329, "step": 62 }, { "epoch": 0.006131386861313868, "grad_norm": 3.4866607421863565, "learning_rate": 2.0388349514563107e-06, "loss": 0.4453, "step": 63 }, { "epoch": 0.006228710462287105, "grad_norm": 2.9369425143161147, "learning_rate": 2.0711974110032367e-06, "loss": 0.467, "step": 64 }, { "epoch": 0.006326034063260341, "grad_norm": 3.0906723589687024, "learning_rate": 2.103559870550162e-06, "loss": 0.3917, "step": 65 }, { "epoch": 0.006423357664233576, "grad_norm": 3.5121616512799747, "learning_rate": 2.1359223300970874e-06, "loss": 0.6428, "step": 66 }, { "epoch": 0.006520681265206813, "grad_norm": 3.470270871630247, "learning_rate": 2.1682847896440134e-06, "loss": 0.586, "step": 67 }, { "epoch": 0.006618004866180049, "grad_norm": 2.8689679430782498, "learning_rate": 2.2006472491909385e-06, "loss": 0.2938, "step": 68 }, { "epoch": 0.006715328467153284, "grad_norm": 4.115573400175418, "learning_rate": 2.2330097087378645e-06, "loss": 0.3855, "step": 69 }, { "epoch": 0.006812652068126521, "grad_norm": 3.903319335204406, "learning_rate": 2.26537216828479e-06, "loss": 0.6272, "step": 70 }, { "epoch": 0.006909975669099757, "grad_norm": 2.649165320750572, "learning_rate": 2.297734627831715e-06, "loss": 0.5229, "step": 71 }, { "epoch": 0.0070072992700729924, "grad_norm": 2.8543884488184235, "learning_rate": 2.330097087378641e-06, "loss": 0.4006, "step": 72 }, { "epoch": 0.007104622871046229, "grad_norm": 2.9817247056794134, "learning_rate": 2.3624595469255667e-06, "loss": 0.2331, "step": 73 }, { "epoch": 0.007201946472019465, "grad_norm": 3.592880940053797, "learning_rate": 2.394822006472492e-06, "loss": 0.4889, "step": 74 }, { "epoch": 0.0072992700729927005, "grad_norm": 2.89844013224274, "learning_rate": 2.427184466019418e-06, "loss": 0.4711, "step": 75 }, { "epoch": 0.007396593673965937, "grad_norm": 2.6071345596032134, "learning_rate": 2.4595469255663434e-06, "loss": 0.4844, "step": 76 }, { "epoch": 0.007493917274939173, "grad_norm": 2.9053930844585776, "learning_rate": 2.491909385113269e-06, "loss": 0.5163, "step": 77 }, { "epoch": 0.0075912408759124085, "grad_norm": 3.4016540038418115, "learning_rate": 2.5242718446601945e-06, "loss": 0.5852, "step": 78 }, { "epoch": 0.007688564476885645, "grad_norm": 2.7133170026932887, "learning_rate": 2.55663430420712e-06, "loss": 0.4934, "step": 79 }, { "epoch": 0.007785888077858881, "grad_norm": 3.2321439410345585, "learning_rate": 2.588996763754045e-06, "loss": 0.62, "step": 80 }, { "epoch": 0.007883211678832117, "grad_norm": 2.6835948161160545, "learning_rate": 2.621359223300971e-06, "loss": 0.4689, "step": 81 }, { "epoch": 0.007980535279805353, "grad_norm": 4.716894934604404, "learning_rate": 2.6537216828478967e-06, "loss": 0.3364, "step": 82 }, { "epoch": 0.00807785888077859, "grad_norm": 2.6507857723180646, "learning_rate": 2.686084142394822e-06, "loss": 0.3785, "step": 83 }, { "epoch": 0.008175182481751826, "grad_norm": 2.356714630861861, "learning_rate": 2.718446601941748e-06, "loss": 0.2591, "step": 84 }, { "epoch": 0.00827250608272506, "grad_norm": 2.755477478688418, "learning_rate": 2.7508090614886734e-06, "loss": 0.4762, "step": 85 }, { "epoch": 0.008369829683698296, "grad_norm": 3.7771581783688837, "learning_rate": 2.7831715210355993e-06, "loss": 0.4627, "step": 86 }, { "epoch": 0.008467153284671533, "grad_norm": 2.8568450908810257, "learning_rate": 2.8155339805825245e-06, "loss": 0.4322, "step": 87 }, { "epoch": 0.008564476885644769, "grad_norm": 2.914756058289183, "learning_rate": 2.84789644012945e-06, "loss": 0.4835, "step": 88 }, { "epoch": 0.008661800486618005, "grad_norm": 2.414182197047686, "learning_rate": 2.880258899676376e-06, "loss": 0.493, "step": 89 }, { "epoch": 0.008759124087591242, "grad_norm": 2.8597853736106975, "learning_rate": 2.912621359223301e-06, "loss": 0.6063, "step": 90 }, { "epoch": 0.008856447688564476, "grad_norm": 2.4567808863650007, "learning_rate": 2.9449838187702267e-06, "loss": 0.5874, "step": 91 }, { "epoch": 0.008953771289537713, "grad_norm": 2.819434031784131, "learning_rate": 2.9773462783171527e-06, "loss": 0.552, "step": 92 }, { "epoch": 0.009051094890510949, "grad_norm": 1.9840396387462764, "learning_rate": 3.0097087378640778e-06, "loss": 0.3736, "step": 93 }, { "epoch": 0.009148418491484185, "grad_norm": 2.52047300259283, "learning_rate": 3.0420711974110033e-06, "loss": 0.407, "step": 94 }, { "epoch": 0.009245742092457421, "grad_norm": 3.140839526692518, "learning_rate": 3.0744336569579293e-06, "loss": 0.6513, "step": 95 }, { "epoch": 0.009343065693430658, "grad_norm": 3.1368865731879554, "learning_rate": 3.1067961165048544e-06, "loss": 0.4804, "step": 96 }, { "epoch": 0.009440389294403892, "grad_norm": 2.6987222968513196, "learning_rate": 3.13915857605178e-06, "loss": 0.4228, "step": 97 }, { "epoch": 0.009537712895377129, "grad_norm": 2.5779408707034026, "learning_rate": 3.171521035598706e-06, "loss": 0.4654, "step": 98 }, { "epoch": 0.009635036496350365, "grad_norm": 2.5189587792888934, "learning_rate": 3.2038834951456315e-06, "loss": 0.5465, "step": 99 }, { "epoch": 0.009732360097323601, "grad_norm": 2.457408493992738, "learning_rate": 3.2362459546925567e-06, "loss": 0.5077, "step": 100 }, { "epoch": 0.009829683698296838, "grad_norm": 2.445932328031196, "learning_rate": 3.2686084142394826e-06, "loss": 0.492, "step": 101 }, { "epoch": 0.009927007299270074, "grad_norm": 2.3199141960061915, "learning_rate": 3.300970873786408e-06, "loss": 0.4432, "step": 102 }, { "epoch": 0.010024330900243308, "grad_norm": 3.88769555780582, "learning_rate": 3.3333333333333333e-06, "loss": 0.3684, "step": 103 }, { "epoch": 0.010121654501216545, "grad_norm": 2.63905676146042, "learning_rate": 3.3656957928802593e-06, "loss": 0.4238, "step": 104 }, { "epoch": 0.010218978102189781, "grad_norm": 3.0073749174392885, "learning_rate": 3.398058252427185e-06, "loss": 0.4655, "step": 105 }, { "epoch": 0.010316301703163017, "grad_norm": 2.613524831872459, "learning_rate": 3.43042071197411e-06, "loss": 0.4948, "step": 106 }, { "epoch": 0.010413625304136254, "grad_norm": 2.4293628733346764, "learning_rate": 3.462783171521036e-06, "loss": 0.3717, "step": 107 }, { "epoch": 0.010510948905109488, "grad_norm": 3.3036504610837016, "learning_rate": 3.4951456310679615e-06, "loss": 0.4939, "step": 108 }, { "epoch": 0.010608272506082725, "grad_norm": 2.6808221933664846, "learning_rate": 3.5275080906148866e-06, "loss": 0.4809, "step": 109 }, { "epoch": 0.01070559610705596, "grad_norm": 2.853958419293739, "learning_rate": 3.5598705501618126e-06, "loss": 0.4066, "step": 110 }, { "epoch": 0.010802919708029197, "grad_norm": 5.3412930378250145, "learning_rate": 3.592233009708738e-06, "loss": 0.3599, "step": 111 }, { "epoch": 0.010900243309002433, "grad_norm": 2.983669976646381, "learning_rate": 3.624595469255664e-06, "loss": 0.6187, "step": 112 }, { "epoch": 0.01099756690997567, "grad_norm": 3.388543821878077, "learning_rate": 3.6569579288025893e-06, "loss": 0.717, "step": 113 }, { "epoch": 0.011094890510948904, "grad_norm": 3.0720120062792127, "learning_rate": 3.689320388349515e-06, "loss": 0.5057, "step": 114 }, { "epoch": 0.01119221411192214, "grad_norm": 2.521868238475485, "learning_rate": 3.721682847896441e-06, "loss": 0.4308, "step": 115 }, { "epoch": 0.011289537712895377, "grad_norm": 2.641085251645149, "learning_rate": 3.754045307443366e-06, "loss": 0.4047, "step": 116 }, { "epoch": 0.011386861313868613, "grad_norm": 2.6936547530255828, "learning_rate": 3.7864077669902915e-06, "loss": 0.5548, "step": 117 }, { "epoch": 0.01148418491484185, "grad_norm": 5.599830434139348, "learning_rate": 3.818770226537217e-06, "loss": 0.5338, "step": 118 }, { "epoch": 0.011581508515815086, "grad_norm": 2.6372065340185378, "learning_rate": 3.851132686084142e-06, "loss": 0.4833, "step": 119 }, { "epoch": 0.01167883211678832, "grad_norm": 2.555049765563167, "learning_rate": 3.883495145631068e-06, "loss": 0.4295, "step": 120 }, { "epoch": 0.011776155717761557, "grad_norm": 2.22725048478721, "learning_rate": 3.915857605177994e-06, "loss": 0.4074, "step": 121 }, { "epoch": 0.011873479318734793, "grad_norm": 3.0093045583939984, "learning_rate": 3.948220064724919e-06, "loss": 0.7168, "step": 122 }, { "epoch": 0.01197080291970803, "grad_norm": 2.8800338131191223, "learning_rate": 3.980582524271845e-06, "loss": 0.3826, "step": 123 }, { "epoch": 0.012068126520681266, "grad_norm": 2.3197904571086974, "learning_rate": 4.01294498381877e-06, "loss": 0.2584, "step": 124 }, { "epoch": 0.012165450121654502, "grad_norm": 2.929540360888414, "learning_rate": 4.045307443365696e-06, "loss": 0.4617, "step": 125 }, { "epoch": 0.012262773722627737, "grad_norm": 2.5602803735383137, "learning_rate": 4.0776699029126215e-06, "loss": 0.2561, "step": 126 }, { "epoch": 0.012360097323600973, "grad_norm": 2.676345297957673, "learning_rate": 4.1100323624595475e-06, "loss": 0.2996, "step": 127 }, { "epoch": 0.01245742092457421, "grad_norm": 1.9047794610871986, "learning_rate": 4.1423948220064734e-06, "loss": 0.3475, "step": 128 }, { "epoch": 0.012554744525547445, "grad_norm": 2.9014607006450555, "learning_rate": 4.1747572815533986e-06, "loss": 0.4748, "step": 129 }, { "epoch": 0.012652068126520682, "grad_norm": 2.2992367182815987, "learning_rate": 4.207119741100324e-06, "loss": 0.3465, "step": 130 }, { "epoch": 0.012749391727493918, "grad_norm": 2.668874383033437, "learning_rate": 4.23948220064725e-06, "loss": 0.6119, "step": 131 }, { "epoch": 0.012846715328467153, "grad_norm": 2.69106703615133, "learning_rate": 4.271844660194175e-06, "loss": 0.4743, "step": 132 }, { "epoch": 0.012944038929440389, "grad_norm": 2.972314561813759, "learning_rate": 4.304207119741101e-06, "loss": 0.5766, "step": 133 }, { "epoch": 0.013041362530413625, "grad_norm": 2.7487017428059635, "learning_rate": 4.336569579288027e-06, "loss": 0.5818, "step": 134 }, { "epoch": 0.013138686131386862, "grad_norm": 3.1117207482379663, "learning_rate": 4.368932038834952e-06, "loss": 0.6468, "step": 135 }, { "epoch": 0.013236009732360098, "grad_norm": 2.781796948090657, "learning_rate": 4.401294498381877e-06, "loss": 0.7209, "step": 136 }, { "epoch": 0.013333333333333334, "grad_norm": 2.5480533986327556, "learning_rate": 4.433656957928803e-06, "loss": 0.5907, "step": 137 }, { "epoch": 0.013430656934306569, "grad_norm": 2.054397852683208, "learning_rate": 4.466019417475729e-06, "loss": 0.4079, "step": 138 }, { "epoch": 0.013527980535279805, "grad_norm": 2.2564046621809037, "learning_rate": 4.498381877022654e-06, "loss": 0.4, "step": 139 }, { "epoch": 0.013625304136253041, "grad_norm": 2.8739841159071022, "learning_rate": 4.53074433656958e-06, "loss": 0.5819, "step": 140 }, { "epoch": 0.013722627737226278, "grad_norm": 2.6418540847993657, "learning_rate": 4.563106796116505e-06, "loss": 0.589, "step": 141 }, { "epoch": 0.013819951338199514, "grad_norm": 2.431908870746442, "learning_rate": 4.59546925566343e-06, "loss": 0.5468, "step": 142 }, { "epoch": 0.013917274939172749, "grad_norm": 4.44933942542394, "learning_rate": 4.627831715210356e-06, "loss": 0.3846, "step": 143 }, { "epoch": 0.014014598540145985, "grad_norm": 2.2469929628351126, "learning_rate": 4.660194174757282e-06, "loss": 0.3047, "step": 144 }, { "epoch": 0.014111922141119221, "grad_norm": 2.8361034502388205, "learning_rate": 4.6925566343042074e-06, "loss": 0.4186, "step": 145 }, { "epoch": 0.014209245742092457, "grad_norm": 2.485184255788147, "learning_rate": 4.724919093851133e-06, "loss": 0.455, "step": 146 }, { "epoch": 0.014306569343065694, "grad_norm": 2.677307495548506, "learning_rate": 4.7572815533980585e-06, "loss": 0.6346, "step": 147 }, { "epoch": 0.01440389294403893, "grad_norm": 2.9440091029213034, "learning_rate": 4.789644012944984e-06, "loss": 0.4961, "step": 148 }, { "epoch": 0.014501216545012165, "grad_norm": 2.6810327828724723, "learning_rate": 4.82200647249191e-06, "loss": 0.3754, "step": 149 }, { "epoch": 0.014598540145985401, "grad_norm": 2.519257002697837, "learning_rate": 4.854368932038836e-06, "loss": 0.249, "step": 150 }, { "epoch": 0.014695863746958637, "grad_norm": 2.8041238457488578, "learning_rate": 4.886731391585761e-06, "loss": 0.3117, "step": 151 }, { "epoch": 0.014793187347931874, "grad_norm": 2.363481194731433, "learning_rate": 4.919093851132687e-06, "loss": 0.3325, "step": 152 }, { "epoch": 0.01489051094890511, "grad_norm": 3.078347599868747, "learning_rate": 4.951456310679612e-06, "loss": 0.3569, "step": 153 }, { "epoch": 0.014987834549878346, "grad_norm": 3.2926461094535515, "learning_rate": 4.983818770226538e-06, "loss": 0.716, "step": 154 }, { "epoch": 0.01508515815085158, "grad_norm": 2.340052421830345, "learning_rate": 5.016181229773464e-06, "loss": 0.2642, "step": 155 }, { "epoch": 0.015182481751824817, "grad_norm": 1.8915730140906823, "learning_rate": 5.048543689320389e-06, "loss": 0.3523, "step": 156 }, { "epoch": 0.015279805352798053, "grad_norm": 4.2448533254564484, "learning_rate": 5.080906148867314e-06, "loss": 0.5185, "step": 157 }, { "epoch": 0.01537712895377129, "grad_norm": 2.1172922256300333, "learning_rate": 5.11326860841424e-06, "loss": 0.3341, "step": 158 }, { "epoch": 0.015474452554744526, "grad_norm": 2.7414250631657113, "learning_rate": 5.145631067961165e-06, "loss": 0.5965, "step": 159 }, { "epoch": 0.015571776155717762, "grad_norm": 1.977804344185745, "learning_rate": 5.17799352750809e-06, "loss": 0.239, "step": 160 }, { "epoch": 0.015669099756690997, "grad_norm": 2.771807640315191, "learning_rate": 5.210355987055017e-06, "loss": 0.4122, "step": 161 }, { "epoch": 0.015766423357664233, "grad_norm": 1.9977073642008174, "learning_rate": 5.242718446601942e-06, "loss": 0.3423, "step": 162 }, { "epoch": 0.01586374695863747, "grad_norm": 3.222730527079622, "learning_rate": 5.275080906148867e-06, "loss": 0.5647, "step": 163 }, { "epoch": 0.015961070559610706, "grad_norm": 2.95441646694508, "learning_rate": 5.307443365695793e-06, "loss": 0.5198, "step": 164 }, { "epoch": 0.016058394160583942, "grad_norm": 2.3346384576429116, "learning_rate": 5.3398058252427185e-06, "loss": 0.3516, "step": 165 }, { "epoch": 0.01615571776155718, "grad_norm": 2.089159587923689, "learning_rate": 5.372168284789644e-06, "loss": 0.3704, "step": 166 }, { "epoch": 0.016253041362530415, "grad_norm": 2.8135820638465088, "learning_rate": 5.4045307443365705e-06, "loss": 0.3729, "step": 167 }, { "epoch": 0.01635036496350365, "grad_norm": 2.991259557993277, "learning_rate": 5.436893203883496e-06, "loss": 0.5622, "step": 168 }, { "epoch": 0.016447688564476887, "grad_norm": 3.1512644455187857, "learning_rate": 5.4692556634304216e-06, "loss": 0.5915, "step": 169 }, { "epoch": 0.01654501216545012, "grad_norm": 2.616126184062516, "learning_rate": 5.501618122977347e-06, "loss": 0.4252, "step": 170 }, { "epoch": 0.016642335766423356, "grad_norm": 1.9958281517625203, "learning_rate": 5.533980582524272e-06, "loss": 0.3704, "step": 171 }, { "epoch": 0.016739659367396593, "grad_norm": 2.470731302334384, "learning_rate": 5.566343042071199e-06, "loss": 0.4373, "step": 172 }, { "epoch": 0.01683698296836983, "grad_norm": 2.583270308023139, "learning_rate": 5.598705501618124e-06, "loss": 0.4125, "step": 173 }, { "epoch": 0.016934306569343065, "grad_norm": 1.9644684632241667, "learning_rate": 5.631067961165049e-06, "loss": 0.3522, "step": 174 }, { "epoch": 0.0170316301703163, "grad_norm": 2.4207097357376046, "learning_rate": 5.663430420711975e-06, "loss": 0.3579, "step": 175 }, { "epoch": 0.017128953771289538, "grad_norm": 2.3511041847292034, "learning_rate": 5.6957928802589e-06, "loss": 0.5412, "step": 176 }, { "epoch": 0.017226277372262774, "grad_norm": 2.274427899539275, "learning_rate": 5.728155339805825e-06, "loss": 0.5353, "step": 177 }, { "epoch": 0.01732360097323601, "grad_norm": 2.133749284526256, "learning_rate": 5.760517799352752e-06, "loss": 0.4392, "step": 178 }, { "epoch": 0.017420924574209247, "grad_norm": 2.3097462109285787, "learning_rate": 5.792880258899677e-06, "loss": 0.4442, "step": 179 }, { "epoch": 0.017518248175182483, "grad_norm": 2.2128802818602056, "learning_rate": 5.825242718446602e-06, "loss": 0.5635, "step": 180 }, { "epoch": 0.017615571776155716, "grad_norm": 2.103405792854256, "learning_rate": 5.857605177993528e-06, "loss": 0.4533, "step": 181 }, { "epoch": 0.017712895377128952, "grad_norm": 2.0565661990183597, "learning_rate": 5.889967637540453e-06, "loss": 0.3806, "step": 182 }, { "epoch": 0.01781021897810219, "grad_norm": 2.179649872267064, "learning_rate": 5.9223300970873785e-06, "loss": 0.3842, "step": 183 }, { "epoch": 0.017907542579075425, "grad_norm": 3.8333244047199146, "learning_rate": 5.954692556634305e-06, "loss": 0.3876, "step": 184 }, { "epoch": 0.01800486618004866, "grad_norm": 2.2893517217095716, "learning_rate": 5.9870550161812304e-06, "loss": 0.4781, "step": 185 }, { "epoch": 0.018102189781021898, "grad_norm": 1.6022498167897639, "learning_rate": 6.0194174757281556e-06, "loss": 0.2306, "step": 186 }, { "epoch": 0.018199513381995134, "grad_norm": 2.32863493589546, "learning_rate": 6.0517799352750815e-06, "loss": 0.5139, "step": 187 }, { "epoch": 0.01829683698296837, "grad_norm": 2.0789478938631314, "learning_rate": 6.084142394822007e-06, "loss": 0.2824, "step": 188 }, { "epoch": 0.018394160583941607, "grad_norm": 1.7544615955949223, "learning_rate": 6.116504854368932e-06, "loss": 0.4172, "step": 189 }, { "epoch": 0.018491484184914843, "grad_norm": 1.931043696572374, "learning_rate": 6.148867313915859e-06, "loss": 0.3584, "step": 190 }, { "epoch": 0.01858880778588808, "grad_norm": 2.467258437370788, "learning_rate": 6.181229773462784e-06, "loss": 0.462, "step": 191 }, { "epoch": 0.018686131386861315, "grad_norm": 2.1541091684996965, "learning_rate": 6.213592233009709e-06, "loss": 0.3967, "step": 192 }, { "epoch": 0.01878345498783455, "grad_norm": 2.2330486922808395, "learning_rate": 6.245954692556635e-06, "loss": 0.5316, "step": 193 }, { "epoch": 0.018880778588807785, "grad_norm": 2.3498262097642395, "learning_rate": 6.27831715210356e-06, "loss": 0.4815, "step": 194 }, { "epoch": 0.01897810218978102, "grad_norm": 1.7045092076002246, "learning_rate": 6.310679611650487e-06, "loss": 0.3, "step": 195 }, { "epoch": 0.019075425790754257, "grad_norm": 2.5703331850837023, "learning_rate": 6.343042071197412e-06, "loss": 0.4143, "step": 196 }, { "epoch": 0.019172749391727494, "grad_norm": 2.6940646171495133, "learning_rate": 6.375404530744337e-06, "loss": 0.5463, "step": 197 }, { "epoch": 0.01927007299270073, "grad_norm": 2.4185580273524847, "learning_rate": 6.407766990291263e-06, "loss": 0.5215, "step": 198 }, { "epoch": 0.019367396593673966, "grad_norm": 2.6509824694985946, "learning_rate": 6.440129449838188e-06, "loss": 0.5286, "step": 199 }, { "epoch": 0.019464720194647202, "grad_norm": 2.4807219128312767, "learning_rate": 6.472491909385113e-06, "loss": 0.3996, "step": 200 }, { "epoch": 0.01956204379562044, "grad_norm": 2.651883834043772, "learning_rate": 6.50485436893204e-06, "loss": 0.3499, "step": 201 }, { "epoch": 0.019659367396593675, "grad_norm": 2.670759179984812, "learning_rate": 6.537216828478965e-06, "loss": 0.552, "step": 202 }, { "epoch": 0.01975669099756691, "grad_norm": 2.51305850829245, "learning_rate": 6.56957928802589e-06, "loss": 0.3806, "step": 203 }, { "epoch": 0.019854014598540148, "grad_norm": 2.435954851305265, "learning_rate": 6.601941747572816e-06, "loss": 0.6093, "step": 204 }, { "epoch": 0.01995133819951338, "grad_norm": 2.091315833022872, "learning_rate": 6.6343042071197415e-06, "loss": 0.3573, "step": 205 }, { "epoch": 0.020048661800486617, "grad_norm": 2.205515437184344, "learning_rate": 6.666666666666667e-06, "loss": 0.2892, "step": 206 }, { "epoch": 0.020145985401459853, "grad_norm": 2.314981932930035, "learning_rate": 6.6990291262135935e-06, "loss": 0.4184, "step": 207 }, { "epoch": 0.02024330900243309, "grad_norm": 1.9102474885146974, "learning_rate": 6.731391585760519e-06, "loss": 0.2287, "step": 208 }, { "epoch": 0.020340632603406326, "grad_norm": 1.9408029275065433, "learning_rate": 6.763754045307444e-06, "loss": 0.3958, "step": 209 }, { "epoch": 0.020437956204379562, "grad_norm": 2.1006467731485823, "learning_rate": 6.79611650485437e-06, "loss": 0.3764, "step": 210 }, { "epoch": 0.0205352798053528, "grad_norm": 2.0927447282795146, "learning_rate": 6.828478964401295e-06, "loss": 0.531, "step": 211 }, { "epoch": 0.020632603406326035, "grad_norm": 3.4830081465453633, "learning_rate": 6.86084142394822e-06, "loss": 0.4887, "step": 212 }, { "epoch": 0.02072992700729927, "grad_norm": 2.253360993066953, "learning_rate": 6.893203883495147e-06, "loss": 0.4587, "step": 213 }, { "epoch": 0.020827250608272507, "grad_norm": 3.3751096354443852, "learning_rate": 6.925566343042072e-06, "loss": 0.3427, "step": 214 }, { "epoch": 0.020924574209245744, "grad_norm": 1.9729713112803993, "learning_rate": 6.957928802588997e-06, "loss": 0.384, "step": 215 }, { "epoch": 0.021021897810218976, "grad_norm": 2.761285167796522, "learning_rate": 6.990291262135923e-06, "loss": 0.3512, "step": 216 }, { "epoch": 0.021119221411192213, "grad_norm": 2.431882400442612, "learning_rate": 7.022653721682848e-06, "loss": 0.3971, "step": 217 }, { "epoch": 0.02121654501216545, "grad_norm": 3.659254877088116, "learning_rate": 7.055016181229773e-06, "loss": 0.4115, "step": 218 }, { "epoch": 0.021313868613138685, "grad_norm": 2.5501534359714655, "learning_rate": 7.0873786407767e-06, "loss": 0.4963, "step": 219 }, { "epoch": 0.02141119221411192, "grad_norm": 4.296894309260591, "learning_rate": 7.119741100323625e-06, "loss": 0.5203, "step": 220 }, { "epoch": 0.021508515815085158, "grad_norm": 2.5489854552137237, "learning_rate": 7.152103559870551e-06, "loss": 0.4343, "step": 221 }, { "epoch": 0.021605839416058394, "grad_norm": 2.00955207958064, "learning_rate": 7.184466019417476e-06, "loss": 0.3603, "step": 222 }, { "epoch": 0.02170316301703163, "grad_norm": 2.2675038932590224, "learning_rate": 7.2168284789644015e-06, "loss": 0.3968, "step": 223 }, { "epoch": 0.021800486618004867, "grad_norm": 2.4690586331753277, "learning_rate": 7.249190938511328e-06, "loss": 0.5883, "step": 224 }, { "epoch": 0.021897810218978103, "grad_norm": 2.141328682063472, "learning_rate": 7.2815533980582534e-06, "loss": 0.3547, "step": 225 }, { "epoch": 0.02199513381995134, "grad_norm": 2.223927434368622, "learning_rate": 7.3139158576051786e-06, "loss": 0.5031, "step": 226 }, { "epoch": 0.022092457420924576, "grad_norm": 2.8602320319532346, "learning_rate": 7.3462783171521046e-06, "loss": 0.4226, "step": 227 }, { "epoch": 0.02218978102189781, "grad_norm": 2.8852449405031835, "learning_rate": 7.37864077669903e-06, "loss": 0.4298, "step": 228 }, { "epoch": 0.022287104622871045, "grad_norm": 1.7370522944561966, "learning_rate": 7.411003236245955e-06, "loss": 0.3827, "step": 229 }, { "epoch": 0.02238442822384428, "grad_norm": 2.3907908463140584, "learning_rate": 7.443365695792882e-06, "loss": 0.4139, "step": 230 }, { "epoch": 0.022481751824817518, "grad_norm": 2.27581306432663, "learning_rate": 7.475728155339807e-06, "loss": 0.4736, "step": 231 }, { "epoch": 0.022579075425790754, "grad_norm": 2.1861094823645675, "learning_rate": 7.508090614886732e-06, "loss": 0.4809, "step": 232 }, { "epoch": 0.02267639902676399, "grad_norm": 1.9626208371421419, "learning_rate": 7.540453074433658e-06, "loss": 0.3436, "step": 233 }, { "epoch": 0.022773722627737226, "grad_norm": 1.7092390993202267, "learning_rate": 7.572815533980583e-06, "loss": 0.3224, "step": 234 }, { "epoch": 0.022871046228710463, "grad_norm": 3.0168693228526546, "learning_rate": 7.605177993527508e-06, "loss": 0.6366, "step": 235 }, { "epoch": 0.0229683698296837, "grad_norm": 2.424919921496664, "learning_rate": 7.637540453074434e-06, "loss": 0.4483, "step": 236 }, { "epoch": 0.023065693430656935, "grad_norm": 2.4586833984787626, "learning_rate": 7.66990291262136e-06, "loss": 0.4031, "step": 237 }, { "epoch": 0.02316301703163017, "grad_norm": 2.092010230715883, "learning_rate": 7.702265372168284e-06, "loss": 0.4257, "step": 238 }, { "epoch": 0.023260340632603408, "grad_norm": 2.3360188447701655, "learning_rate": 7.734627831715211e-06, "loss": 0.4684, "step": 239 }, { "epoch": 0.02335766423357664, "grad_norm": 2.087175894606599, "learning_rate": 7.766990291262136e-06, "loss": 0.4272, "step": 240 }, { "epoch": 0.023454987834549877, "grad_norm": 2.598684557686617, "learning_rate": 7.799352750809061e-06, "loss": 0.5401, "step": 241 }, { "epoch": 0.023552311435523113, "grad_norm": 2.025117037181364, "learning_rate": 7.831715210355988e-06, "loss": 0.372, "step": 242 }, { "epoch": 0.02364963503649635, "grad_norm": 2.2467324584398405, "learning_rate": 7.864077669902913e-06, "loss": 0.5891, "step": 243 }, { "epoch": 0.023746958637469586, "grad_norm": 2.38036373195977, "learning_rate": 7.896440129449839e-06, "loss": 0.5133, "step": 244 }, { "epoch": 0.023844282238442822, "grad_norm": 2.052700924442009, "learning_rate": 7.928802588996765e-06, "loss": 0.5161, "step": 245 }, { "epoch": 0.02394160583941606, "grad_norm": 3.4299018810240254, "learning_rate": 7.96116504854369e-06, "loss": 0.5314, "step": 246 }, { "epoch": 0.024038929440389295, "grad_norm": 1.3903956706369247, "learning_rate": 7.993527508090616e-06, "loss": 0.3539, "step": 247 }, { "epoch": 0.02413625304136253, "grad_norm": 2.4599878810180873, "learning_rate": 8.02588996763754e-06, "loss": 0.4876, "step": 248 }, { "epoch": 0.024233576642335768, "grad_norm": 2.4053308291912083, "learning_rate": 8.058252427184466e-06, "loss": 0.5185, "step": 249 }, { "epoch": 0.024330900243309004, "grad_norm": 1.6624263546342495, "learning_rate": 8.090614886731393e-06, "loss": 0.2909, "step": 250 }, { "epoch": 0.024428223844282237, "grad_norm": 2.4091367373679597, "learning_rate": 8.122977346278318e-06, "loss": 0.6192, "step": 251 }, { "epoch": 0.024525547445255473, "grad_norm": 2.4595313520548427, "learning_rate": 8.155339805825243e-06, "loss": 0.3444, "step": 252 }, { "epoch": 0.02462287104622871, "grad_norm": 2.3200411140153174, "learning_rate": 8.18770226537217e-06, "loss": 0.6112, "step": 253 }, { "epoch": 0.024720194647201946, "grad_norm": 2.029624875741936, "learning_rate": 8.220064724919095e-06, "loss": 0.4524, "step": 254 }, { "epoch": 0.024817518248175182, "grad_norm": 1.8862765408033388, "learning_rate": 8.25242718446602e-06, "loss": 0.2173, "step": 255 }, { "epoch": 0.02491484184914842, "grad_norm": 2.575687620331568, "learning_rate": 8.284789644012947e-06, "loss": 0.4599, "step": 256 }, { "epoch": 0.025012165450121655, "grad_norm": 2.373530485379713, "learning_rate": 8.317152103559872e-06, "loss": 0.5326, "step": 257 }, { "epoch": 0.02510948905109489, "grad_norm": 2.4086353319447262, "learning_rate": 8.349514563106797e-06, "loss": 0.6275, "step": 258 }, { "epoch": 0.025206812652068127, "grad_norm": 2.1075725625285697, "learning_rate": 8.381877022653722e-06, "loss": 0.44, "step": 259 }, { "epoch": 0.025304136253041364, "grad_norm": 2.0285700798989614, "learning_rate": 8.414239482200647e-06, "loss": 0.3489, "step": 260 }, { "epoch": 0.0254014598540146, "grad_norm": 2.5592973746241, "learning_rate": 8.446601941747573e-06, "loss": 0.4403, "step": 261 }, { "epoch": 0.025498783454987836, "grad_norm": 2.470930078509074, "learning_rate": 8.4789644012945e-06, "loss": 0.4985, "step": 262 }, { "epoch": 0.02559610705596107, "grad_norm": 2.099638103909556, "learning_rate": 8.511326860841424e-06, "loss": 0.4194, "step": 263 }, { "epoch": 0.025693430656934305, "grad_norm": 1.6030834140551835, "learning_rate": 8.54368932038835e-06, "loss": 0.3382, "step": 264 }, { "epoch": 0.02579075425790754, "grad_norm": 1.8960928547169034, "learning_rate": 8.576051779935276e-06, "loss": 0.2838, "step": 265 }, { "epoch": 0.025888077858880778, "grad_norm": 2.4306930963261966, "learning_rate": 8.608414239482202e-06, "loss": 0.4956, "step": 266 }, { "epoch": 0.025985401459854014, "grad_norm": 2.374430136325354, "learning_rate": 8.640776699029127e-06, "loss": 0.5083, "step": 267 }, { "epoch": 0.02608272506082725, "grad_norm": 2.410095115145934, "learning_rate": 8.673139158576054e-06, "loss": 0.4247, "step": 268 }, { "epoch": 0.026180048661800487, "grad_norm": 2.41271065696519, "learning_rate": 8.705501618122979e-06, "loss": 0.6946, "step": 269 }, { "epoch": 0.026277372262773723, "grad_norm": 1.752688930628829, "learning_rate": 8.737864077669904e-06, "loss": 0.2662, "step": 270 }, { "epoch": 0.02637469586374696, "grad_norm": 1.9842034213162434, "learning_rate": 8.770226537216829e-06, "loss": 0.3611, "step": 271 }, { "epoch": 0.026472019464720196, "grad_norm": 2.4137979998327497, "learning_rate": 8.802588996763754e-06, "loss": 0.501, "step": 272 }, { "epoch": 0.026569343065693432, "grad_norm": 2.929650064864996, "learning_rate": 8.834951456310681e-06, "loss": 0.6153, "step": 273 }, { "epoch": 0.02666666666666667, "grad_norm": 2.281738020025263, "learning_rate": 8.867313915857606e-06, "loss": 0.5395, "step": 274 }, { "epoch": 0.0267639902676399, "grad_norm": 2.1406726692627975, "learning_rate": 8.899676375404531e-06, "loss": 0.4039, "step": 275 }, { "epoch": 0.026861313868613138, "grad_norm": 3.2366954201371523, "learning_rate": 8.932038834951458e-06, "loss": 0.5414, "step": 276 }, { "epoch": 0.026958637469586374, "grad_norm": 2.1900667662872513, "learning_rate": 8.964401294498383e-06, "loss": 0.3815, "step": 277 }, { "epoch": 0.02705596107055961, "grad_norm": 2.5301939091612216, "learning_rate": 8.996763754045308e-06, "loss": 0.8016, "step": 278 }, { "epoch": 0.027153284671532846, "grad_norm": 2.2552758985680907, "learning_rate": 9.029126213592233e-06, "loss": 0.4133, "step": 279 }, { "epoch": 0.027250608272506083, "grad_norm": 2.309545536997134, "learning_rate": 9.06148867313916e-06, "loss": 0.5346, "step": 280 }, { "epoch": 0.02734793187347932, "grad_norm": 2.585578916644781, "learning_rate": 9.093851132686085e-06, "loss": 0.407, "step": 281 }, { "epoch": 0.027445255474452555, "grad_norm": 1.8503464194025006, "learning_rate": 9.12621359223301e-06, "loss": 0.4674, "step": 282 }, { "epoch": 0.02754257907542579, "grad_norm": 2.431490115980846, "learning_rate": 9.158576051779936e-06, "loss": 0.6026, "step": 283 }, { "epoch": 0.027639902676399028, "grad_norm": 1.916233248702735, "learning_rate": 9.19093851132686e-06, "loss": 0.4949, "step": 284 }, { "epoch": 0.027737226277372264, "grad_norm": 2.2160236640245072, "learning_rate": 9.223300970873788e-06, "loss": 0.4765, "step": 285 }, { "epoch": 0.027834549878345497, "grad_norm": 2.0764827118780143, "learning_rate": 9.255663430420713e-06, "loss": 0.472, "step": 286 }, { "epoch": 0.027931873479318733, "grad_norm": 2.638286661284288, "learning_rate": 9.288025889967638e-06, "loss": 0.6312, "step": 287 }, { "epoch": 0.02802919708029197, "grad_norm": 1.940011273577467, "learning_rate": 9.320388349514565e-06, "loss": 0.4555, "step": 288 }, { "epoch": 0.028126520681265206, "grad_norm": 1.8760624736314784, "learning_rate": 9.35275080906149e-06, "loss": 0.3625, "step": 289 }, { "epoch": 0.028223844282238442, "grad_norm": 1.3468692859077058, "learning_rate": 9.385113268608415e-06, "loss": 0.2442, "step": 290 }, { "epoch": 0.02832116788321168, "grad_norm": 2.1497394847504014, "learning_rate": 9.41747572815534e-06, "loss": 0.5227, "step": 291 }, { "epoch": 0.028418491484184915, "grad_norm": 2.1233743171190014, "learning_rate": 9.449838187702267e-06, "loss": 0.6184, "step": 292 }, { "epoch": 0.02851581508515815, "grad_norm": 2.337806183860394, "learning_rate": 9.482200647249192e-06, "loss": 0.5491, "step": 293 }, { "epoch": 0.028613138686131388, "grad_norm": 2.015000594070385, "learning_rate": 9.514563106796117e-06, "loss": 0.5137, "step": 294 }, { "epoch": 0.028710462287104624, "grad_norm": 2.0267324830753766, "learning_rate": 9.546925566343042e-06, "loss": 0.4117, "step": 295 }, { "epoch": 0.02880778588807786, "grad_norm": 1.732639028192012, "learning_rate": 9.579288025889967e-06, "loss": 0.3156, "step": 296 }, { "epoch": 0.028905109489051097, "grad_norm": 2.1204056159243923, "learning_rate": 9.611650485436894e-06, "loss": 0.6056, "step": 297 }, { "epoch": 0.02900243309002433, "grad_norm": 1.7868071753968195, "learning_rate": 9.64401294498382e-06, "loss": 0.3417, "step": 298 }, { "epoch": 0.029099756690997566, "grad_norm": 1.9477439300595292, "learning_rate": 9.676375404530746e-06, "loss": 0.3631, "step": 299 }, { "epoch": 0.029197080291970802, "grad_norm": 1.7688147839655162, "learning_rate": 9.708737864077671e-06, "loss": 0.3605, "step": 300 }, { "epoch": 0.029294403892944038, "grad_norm": 1.9162335597538034, "learning_rate": 9.741100323624596e-06, "loss": 0.2498, "step": 301 }, { "epoch": 0.029391727493917275, "grad_norm": 2.9282579520055756, "learning_rate": 9.773462783171522e-06, "loss": 0.4286, "step": 302 }, { "epoch": 0.02948905109489051, "grad_norm": 1.9744499285549086, "learning_rate": 9.805825242718447e-06, "loss": 0.3391, "step": 303 }, { "epoch": 0.029586374695863747, "grad_norm": 2.2116032868392455, "learning_rate": 9.838187702265373e-06, "loss": 0.3414, "step": 304 }, { "epoch": 0.029683698296836983, "grad_norm": 1.9159144570242486, "learning_rate": 9.870550161812299e-06, "loss": 0.2915, "step": 305 }, { "epoch": 0.02978102189781022, "grad_norm": 2.671718838238437, "learning_rate": 9.902912621359224e-06, "loss": 0.79, "step": 306 }, { "epoch": 0.029878345498783456, "grad_norm": 2.093937424199301, "learning_rate": 9.935275080906149e-06, "loss": 0.576, "step": 307 }, { "epoch": 0.029975669099756692, "grad_norm": 1.895574286512308, "learning_rate": 9.967637540453076e-06, "loss": 0.4223, "step": 308 }, { "epoch": 0.03007299270072993, "grad_norm": 2.142643554578675, "learning_rate": 1e-05, "loss": 0.4719, "step": 309 }, { "epoch": 0.03017031630170316, "grad_norm": 2.2548613483238378, "learning_rate": 9.999999751573464e-06, "loss": 0.5547, "step": 310 }, { "epoch": 0.030267639902676398, "grad_norm": 2.375146158639999, "learning_rate": 9.99999900629388e-06, "loss": 0.3864, "step": 311 }, { "epoch": 0.030364963503649634, "grad_norm": 1.558937895217452, "learning_rate": 9.99999776416132e-06, "loss": 0.3409, "step": 312 }, { "epoch": 0.03046228710462287, "grad_norm": 2.7508940543848115, "learning_rate": 9.99999602517591e-06, "loss": 0.3641, "step": 313 }, { "epoch": 0.030559610705596107, "grad_norm": 2.228096737889712, "learning_rate": 9.99999378933782e-06, "loss": 0.6464, "step": 314 }, { "epoch": 0.030656934306569343, "grad_norm": 1.5612763763472, "learning_rate": 9.999991056647274e-06, "loss": 0.3124, "step": 315 }, { "epoch": 0.03075425790754258, "grad_norm": 2.3203527787434104, "learning_rate": 9.999987827104544e-06, "loss": 0.5893, "step": 316 }, { "epoch": 0.030851581508515816, "grad_norm": 1.8472611567410342, "learning_rate": 9.999984100709951e-06, "loss": 0.3732, "step": 317 }, { "epoch": 0.030948905109489052, "grad_norm": 2.269778108549014, "learning_rate": 9.999979877463866e-06, "loss": 0.5537, "step": 318 }, { "epoch": 0.03104622871046229, "grad_norm": 2.381498581134022, "learning_rate": 9.999975157366705e-06, "loss": 0.7179, "step": 319 }, { "epoch": 0.031143552311435525, "grad_norm": 1.7030655036823346, "learning_rate": 9.99996994041894e-06, "loss": 0.4256, "step": 320 }, { "epoch": 0.031240875912408757, "grad_norm": 1.8361141038730153, "learning_rate": 9.999964226621089e-06, "loss": 0.4648, "step": 321 }, { "epoch": 0.031338199513381994, "grad_norm": 1.7985459229558753, "learning_rate": 9.99995801597372e-06, "loss": 0.3031, "step": 322 }, { "epoch": 0.031435523114355234, "grad_norm": 2.4309020119442915, "learning_rate": 9.99995130847745e-06, "loss": 0.5011, "step": 323 }, { "epoch": 0.031532846715328466, "grad_norm": 2.048514022969095, "learning_rate": 9.999944104132944e-06, "loss": 0.6152, "step": 324 }, { "epoch": 0.031630170316301706, "grad_norm": 1.8892667320795724, "learning_rate": 9.99993640294092e-06, "loss": 0.4738, "step": 325 }, { "epoch": 0.03172749391727494, "grad_norm": 2.081179331819785, "learning_rate": 9.999928204902141e-06, "loss": 0.5192, "step": 326 }, { "epoch": 0.03182481751824817, "grad_norm": 2.410280889073595, "learning_rate": 9.999919510017424e-06, "loss": 0.3314, "step": 327 }, { "epoch": 0.03192214111922141, "grad_norm": 1.663034255724975, "learning_rate": 9.999910318287632e-06, "loss": 0.3342, "step": 328 }, { "epoch": 0.032019464720194644, "grad_norm": 1.7874391345352068, "learning_rate": 9.999900629713679e-06, "loss": 0.3189, "step": 329 }, { "epoch": 0.032116788321167884, "grad_norm": 2.1098429973097805, "learning_rate": 9.999890444296528e-06, "loss": 0.4561, "step": 330 }, { "epoch": 0.03221411192214112, "grad_norm": 2.4678279265353558, "learning_rate": 9.999879762037187e-06, "loss": 0.5831, "step": 331 }, { "epoch": 0.03231143552311436, "grad_norm": 1.6643716587630457, "learning_rate": 9.999868582936726e-06, "loss": 0.4371, "step": 332 }, { "epoch": 0.03240875912408759, "grad_norm": 2.088466639768523, "learning_rate": 9.999856906996246e-06, "loss": 0.3904, "step": 333 }, { "epoch": 0.03250608272506083, "grad_norm": 2.0023651443392256, "learning_rate": 9.999844734216914e-06, "loss": 0.4802, "step": 334 }, { "epoch": 0.03260340632603406, "grad_norm": 2.161282844007076, "learning_rate": 9.99983206459994e-06, "loss": 0.5187, "step": 335 }, { "epoch": 0.0327007299270073, "grad_norm": 2.10212671583593, "learning_rate": 9.999818898146576e-06, "loss": 0.4618, "step": 336 }, { "epoch": 0.032798053527980535, "grad_norm": 2.2142899508809286, "learning_rate": 9.999805234858137e-06, "loss": 0.2387, "step": 337 }, { "epoch": 0.032895377128953775, "grad_norm": 2.1084763484693023, "learning_rate": 9.999791074735981e-06, "loss": 0.5652, "step": 338 }, { "epoch": 0.03299270072992701, "grad_norm": 2.261838498017328, "learning_rate": 9.99977641778151e-06, "loss": 0.7224, "step": 339 }, { "epoch": 0.03309002433090024, "grad_norm": 1.612816030006559, "learning_rate": 9.999761263996184e-06, "loss": 0.377, "step": 340 }, { "epoch": 0.03318734793187348, "grad_norm": 2.1209830295615832, "learning_rate": 9.999745613381507e-06, "loss": 0.614, "step": 341 }, { "epoch": 0.03328467153284671, "grad_norm": 1.7938764015879674, "learning_rate": 9.999729465939036e-06, "loss": 0.3983, "step": 342 }, { "epoch": 0.03338199513381995, "grad_norm": 1.943418875698731, "learning_rate": 9.999712821670375e-06, "loss": 0.4708, "step": 343 }, { "epoch": 0.033479318734793186, "grad_norm": 1.9787546900237571, "learning_rate": 9.99969568057718e-06, "loss": 0.578, "step": 344 }, { "epoch": 0.033576642335766425, "grad_norm": 1.4798263726328331, "learning_rate": 9.99967804266115e-06, "loss": 0.394, "step": 345 }, { "epoch": 0.03367396593673966, "grad_norm": 2.1936298043995484, "learning_rate": 9.99965990792404e-06, "loss": 0.6316, "step": 346 }, { "epoch": 0.0337712895377129, "grad_norm": 2.2799650780195133, "learning_rate": 9.99964127636765e-06, "loss": 0.3985, "step": 347 }, { "epoch": 0.03386861313868613, "grad_norm": 1.8519049219191819, "learning_rate": 9.999622147993837e-06, "loss": 0.3853, "step": 348 }, { "epoch": 0.03396593673965937, "grad_norm": 1.5111895282974241, "learning_rate": 9.999602522804497e-06, "loss": 0.4201, "step": 349 }, { "epoch": 0.0340632603406326, "grad_norm": 1.8605769784283237, "learning_rate": 9.99958240080158e-06, "loss": 0.5225, "step": 350 }, { "epoch": 0.034160583941605836, "grad_norm": 1.6063240538866903, "learning_rate": 9.999561781987087e-06, "loss": 0.3165, "step": 351 }, { "epoch": 0.034257907542579076, "grad_norm": 1.4751976204077173, "learning_rate": 9.999540666363068e-06, "loss": 0.3156, "step": 352 }, { "epoch": 0.03435523114355231, "grad_norm": 2.1029966771511757, "learning_rate": 9.99951905393162e-06, "loss": 0.5336, "step": 353 }, { "epoch": 0.03445255474452555, "grad_norm": 2.1419054642874267, "learning_rate": 9.99949694469489e-06, "loss": 0.5253, "step": 354 }, { "epoch": 0.03454987834549878, "grad_norm": 2.169397271826959, "learning_rate": 9.999474338655075e-06, "loss": 0.5567, "step": 355 }, { "epoch": 0.03464720194647202, "grad_norm": 2.2972412855327797, "learning_rate": 9.999451235814422e-06, "loss": 0.5233, "step": 356 }, { "epoch": 0.034744525547445254, "grad_norm": 1.830377999961128, "learning_rate": 9.999427636175228e-06, "loss": 0.4297, "step": 357 }, { "epoch": 0.034841849148418494, "grad_norm": 2.1217123292302875, "learning_rate": 9.999403539739837e-06, "loss": 0.3605, "step": 358 }, { "epoch": 0.03493917274939173, "grad_norm": 2.001599625802253, "learning_rate": 9.999378946510642e-06, "loss": 0.5237, "step": 359 }, { "epoch": 0.035036496350364967, "grad_norm": 1.6719956399048532, "learning_rate": 9.99935385649009e-06, "loss": 0.424, "step": 360 }, { "epoch": 0.0351338199513382, "grad_norm": 1.5962062682133515, "learning_rate": 9.99932826968067e-06, "loss": 0.4228, "step": 361 }, { "epoch": 0.03523114355231143, "grad_norm": 1.9754274750693919, "learning_rate": 9.999302186084929e-06, "loss": 0.4333, "step": 362 }, { "epoch": 0.03532846715328467, "grad_norm": 1.8248617929879183, "learning_rate": 9.999275605705457e-06, "loss": 0.4985, "step": 363 }, { "epoch": 0.035425790754257905, "grad_norm": 2.5923075514224982, "learning_rate": 9.999248528544895e-06, "loss": 0.4829, "step": 364 }, { "epoch": 0.035523114355231145, "grad_norm": 1.9900801938638135, "learning_rate": 9.999220954605932e-06, "loss": 0.587, "step": 365 }, { "epoch": 0.03562043795620438, "grad_norm": 1.731558772897005, "learning_rate": 9.999192883891314e-06, "loss": 0.3299, "step": 366 }, { "epoch": 0.03571776155717762, "grad_norm": 2.339577788711278, "learning_rate": 9.999164316403823e-06, "loss": 0.4845, "step": 367 }, { "epoch": 0.03581508515815085, "grad_norm": 1.9784113864985955, "learning_rate": 9.999135252146302e-06, "loss": 0.5776, "step": 368 }, { "epoch": 0.03591240875912409, "grad_norm": 1.5555461256937277, "learning_rate": 9.999105691121638e-06, "loss": 0.3563, "step": 369 }, { "epoch": 0.03600973236009732, "grad_norm": 1.7905677559908044, "learning_rate": 9.99907563333277e-06, "loss": 0.546, "step": 370 }, { "epoch": 0.03610705596107056, "grad_norm": 2.0490894714600287, "learning_rate": 9.999045078782684e-06, "loss": 0.6836, "step": 371 }, { "epoch": 0.036204379562043795, "grad_norm": 2.216601446334751, "learning_rate": 9.999014027474413e-06, "loss": 0.5237, "step": 372 }, { "epoch": 0.036301703163017035, "grad_norm": 1.5937926342815392, "learning_rate": 9.998982479411047e-06, "loss": 0.3539, "step": 373 }, { "epoch": 0.03639902676399027, "grad_norm": 2.3941848280266864, "learning_rate": 9.99895043459572e-06, "loss": 0.6249, "step": 374 }, { "epoch": 0.0364963503649635, "grad_norm": 2.072859669066288, "learning_rate": 9.998917893031615e-06, "loss": 0.5415, "step": 375 }, { "epoch": 0.03659367396593674, "grad_norm": 1.670908711065728, "learning_rate": 9.998884854721968e-06, "loss": 0.3034, "step": 376 }, { "epoch": 0.03669099756690997, "grad_norm": 1.9880303784818283, "learning_rate": 9.998851319670057e-06, "loss": 0.5025, "step": 377 }, { "epoch": 0.03678832116788321, "grad_norm": 1.3517666701087396, "learning_rate": 9.99881728787922e-06, "loss": 0.2775, "step": 378 }, { "epoch": 0.036885644768856446, "grad_norm": 1.8952553535268069, "learning_rate": 9.998782759352839e-06, "loss": 0.5306, "step": 379 }, { "epoch": 0.036982968369829686, "grad_norm": 1.8730537486024816, "learning_rate": 9.998747734094338e-06, "loss": 0.386, "step": 380 }, { "epoch": 0.03708029197080292, "grad_norm": 2.058996056292158, "learning_rate": 9.998712212107205e-06, "loss": 0.5641, "step": 381 }, { "epoch": 0.03717761557177616, "grad_norm": 1.9837834234853275, "learning_rate": 9.998676193394966e-06, "loss": 0.2628, "step": 382 }, { "epoch": 0.03727493917274939, "grad_norm": 2.189700953999047, "learning_rate": 9.998639677961203e-06, "loss": 0.6024, "step": 383 }, { "epoch": 0.03737226277372263, "grad_norm": 2.060696593716547, "learning_rate": 9.99860266580954e-06, "loss": 0.5377, "step": 384 }, { "epoch": 0.037469586374695864, "grad_norm": 2.0831966609629227, "learning_rate": 9.99856515694366e-06, "loss": 0.5063, "step": 385 }, { "epoch": 0.0375669099756691, "grad_norm": 2.2950496556846227, "learning_rate": 9.998527151367288e-06, "loss": 0.6484, "step": 386 }, { "epoch": 0.037664233576642336, "grad_norm": 2.2597922123273873, "learning_rate": 9.9984886490842e-06, "loss": 0.6617, "step": 387 }, { "epoch": 0.03776155717761557, "grad_norm": 2.071575887731456, "learning_rate": 9.99844965009822e-06, "loss": 0.5405, "step": 388 }, { "epoch": 0.03785888077858881, "grad_norm": 2.004249587957457, "learning_rate": 9.99841015441323e-06, "loss": 0.4306, "step": 389 }, { "epoch": 0.03795620437956204, "grad_norm": 1.9297023880727862, "learning_rate": 9.99837016203315e-06, "loss": 0.4083, "step": 390 }, { "epoch": 0.03805352798053528, "grad_norm": 2.001337081282171, "learning_rate": 9.998329672961952e-06, "loss": 0.4999, "step": 391 }, { "epoch": 0.038150851581508514, "grad_norm": 1.7630230797021285, "learning_rate": 9.998288687203665e-06, "loss": 0.4267, "step": 392 }, { "epoch": 0.038248175182481754, "grad_norm": 1.4413546421147376, "learning_rate": 9.998247204762358e-06, "loss": 0.3028, "step": 393 }, { "epoch": 0.03834549878345499, "grad_norm": 2.032450629241147, "learning_rate": 9.998205225642154e-06, "loss": 0.4216, "step": 394 }, { "epoch": 0.03844282238442823, "grad_norm": 1.8288270303352272, "learning_rate": 9.998162749847224e-06, "loss": 0.451, "step": 395 }, { "epoch": 0.03854014598540146, "grad_norm": 1.5869427581540143, "learning_rate": 9.998119777381791e-06, "loss": 0.4896, "step": 396 }, { "epoch": 0.03863746958637469, "grad_norm": 1.9312614168983935, "learning_rate": 9.998076308250122e-06, "loss": 0.351, "step": 397 }, { "epoch": 0.03873479318734793, "grad_norm": 2.182734939846557, "learning_rate": 9.99803234245654e-06, "loss": 0.4456, "step": 398 }, { "epoch": 0.038832116788321165, "grad_norm": 1.6075130172605856, "learning_rate": 9.997987880005412e-06, "loss": 0.3333, "step": 399 }, { "epoch": 0.038929440389294405, "grad_norm": 2.0206579020801048, "learning_rate": 9.997942920901154e-06, "loss": 0.4662, "step": 400 }, { "epoch": 0.03902676399026764, "grad_norm": 2.0019154912621246, "learning_rate": 9.997897465148236e-06, "loss": 0.588, "step": 401 }, { "epoch": 0.03912408759124088, "grad_norm": 1.9556688755730123, "learning_rate": 9.997851512751178e-06, "loss": 0.5364, "step": 402 }, { "epoch": 0.03922141119221411, "grad_norm": 2.1735940620422687, "learning_rate": 9.997805063714541e-06, "loss": 0.4155, "step": 403 }, { "epoch": 0.03931873479318735, "grad_norm": 1.893104755523836, "learning_rate": 9.997758118042945e-06, "loss": 0.2835, "step": 404 }, { "epoch": 0.03941605839416058, "grad_norm": 1.892857392200546, "learning_rate": 9.99771067574105e-06, "loss": 0.317, "step": 405 }, { "epoch": 0.03951338199513382, "grad_norm": 2.194365925195629, "learning_rate": 9.997662736813575e-06, "loss": 0.5972, "step": 406 }, { "epoch": 0.039610705596107056, "grad_norm": 2.3359516870584547, "learning_rate": 9.997614301265281e-06, "loss": 0.3505, "step": 407 }, { "epoch": 0.039708029197080295, "grad_norm": 1.8041349283411827, "learning_rate": 9.997565369100983e-06, "loss": 0.4003, "step": 408 }, { "epoch": 0.03980535279805353, "grad_norm": 2.2199870140108273, "learning_rate": 9.997515940325542e-06, "loss": 0.4428, "step": 409 }, { "epoch": 0.03990267639902676, "grad_norm": 2.193796849633566, "learning_rate": 9.997466014943871e-06, "loss": 0.3906, "step": 410 }, { "epoch": 0.04, "grad_norm": 2.7309920828616168, "learning_rate": 9.99741559296093e-06, "loss": 0.6283, "step": 411 }, { "epoch": 0.040097323600973234, "grad_norm": 2.220745639846989, "learning_rate": 9.99736467438173e-06, "loss": 0.4568, "step": 412 }, { "epoch": 0.04019464720194647, "grad_norm": 1.905067765139487, "learning_rate": 9.99731325921133e-06, "loss": 0.3198, "step": 413 }, { "epoch": 0.040291970802919706, "grad_norm": 2.0461180940034116, "learning_rate": 9.997261347454841e-06, "loss": 0.3783, "step": 414 }, { "epoch": 0.040389294403892946, "grad_norm": 1.9732614929529544, "learning_rate": 9.99720893911742e-06, "loss": 0.5211, "step": 415 }, { "epoch": 0.04048661800486618, "grad_norm": 2.341156401873798, "learning_rate": 9.997156034204276e-06, "loss": 0.5094, "step": 416 }, { "epoch": 0.04058394160583942, "grad_norm": 2.2588135503158138, "learning_rate": 9.997102632720664e-06, "loss": 0.591, "step": 417 }, { "epoch": 0.04068126520681265, "grad_norm": 2.187795564574772, "learning_rate": 9.997048734671893e-06, "loss": 0.3811, "step": 418 }, { "epoch": 0.04077858880778589, "grad_norm": 2.2570398189900938, "learning_rate": 9.996994340063314e-06, "loss": 0.4494, "step": 419 }, { "epoch": 0.040875912408759124, "grad_norm": 2.3267878846596597, "learning_rate": 9.996939448900341e-06, "loss": 0.5254, "step": 420 }, { "epoch": 0.04097323600973236, "grad_norm": 1.9149387144635641, "learning_rate": 9.99688406118842e-06, "loss": 0.4281, "step": 421 }, { "epoch": 0.0410705596107056, "grad_norm": 2.4052095021382285, "learning_rate": 9.996828176933062e-06, "loss": 0.61, "step": 422 }, { "epoch": 0.04116788321167883, "grad_norm": 2.8744864627123237, "learning_rate": 9.996771796139814e-06, "loss": 0.4708, "step": 423 }, { "epoch": 0.04126520681265207, "grad_norm": 2.0334953222734513, "learning_rate": 9.996714918814284e-06, "loss": 0.2697, "step": 424 }, { "epoch": 0.0413625304136253, "grad_norm": 2.1314093477075486, "learning_rate": 9.996657544962119e-06, "loss": 0.3026, "step": 425 }, { "epoch": 0.04145985401459854, "grad_norm": 1.7241742631767316, "learning_rate": 9.996599674589022e-06, "loss": 0.3624, "step": 426 }, { "epoch": 0.041557177615571775, "grad_norm": 2.417754377928955, "learning_rate": 9.996541307700746e-06, "loss": 0.6682, "step": 427 }, { "epoch": 0.041654501216545015, "grad_norm": 2.2126055245100256, "learning_rate": 9.99648244430309e-06, "loss": 0.3705, "step": 428 }, { "epoch": 0.04175182481751825, "grad_norm": 1.8224510106748588, "learning_rate": 9.996423084401901e-06, "loss": 0.4318, "step": 429 }, { "epoch": 0.04184914841849149, "grad_norm": 1.6786428352287364, "learning_rate": 9.996363228003079e-06, "loss": 0.4662, "step": 430 }, { "epoch": 0.04194647201946472, "grad_norm": 1.9342922605897592, "learning_rate": 9.99630287511257e-06, "loss": 0.4874, "step": 431 }, { "epoch": 0.04204379562043795, "grad_norm": 1.9444011100602645, "learning_rate": 9.996242025736377e-06, "loss": 0.3711, "step": 432 }, { "epoch": 0.04214111922141119, "grad_norm": 3.114184163688958, "learning_rate": 9.99618067988054e-06, "loss": 0.5342, "step": 433 }, { "epoch": 0.042238442822384425, "grad_norm": 1.993932460938173, "learning_rate": 9.99611883755116e-06, "loss": 0.465, "step": 434 }, { "epoch": 0.042335766423357665, "grad_norm": 1.5062408953506277, "learning_rate": 9.99605649875438e-06, "loss": 0.3862, "step": 435 }, { "epoch": 0.0424330900243309, "grad_norm": 2.5287447175721733, "learning_rate": 9.995993663496394e-06, "loss": 0.5638, "step": 436 }, { "epoch": 0.04253041362530414, "grad_norm": 1.7215400937807486, "learning_rate": 9.995930331783448e-06, "loss": 0.3507, "step": 437 }, { "epoch": 0.04262773722627737, "grad_norm": 1.5105936757865817, "learning_rate": 9.995866503621834e-06, "loss": 0.4086, "step": 438 }, { "epoch": 0.04272506082725061, "grad_norm": 1.828501540310894, "learning_rate": 9.995802179017893e-06, "loss": 0.3477, "step": 439 }, { "epoch": 0.04282238442822384, "grad_norm": 1.6658361590948114, "learning_rate": 9.995737357978022e-06, "loss": 0.4006, "step": 440 }, { "epoch": 0.04291970802919708, "grad_norm": 1.6434395036324305, "learning_rate": 9.995672040508656e-06, "loss": 0.4349, "step": 441 }, { "epoch": 0.043017031630170316, "grad_norm": 1.9913424027071125, "learning_rate": 9.99560622661629e-06, "loss": 0.3415, "step": 442 }, { "epoch": 0.043114355231143556, "grad_norm": 1.6487474195389296, "learning_rate": 9.995539916307463e-06, "loss": 0.4804, "step": 443 }, { "epoch": 0.04321167883211679, "grad_norm": 1.4861266391850032, "learning_rate": 9.995473109588764e-06, "loss": 0.411, "step": 444 }, { "epoch": 0.04330900243309002, "grad_norm": 1.4390762643228305, "learning_rate": 9.995405806466831e-06, "loss": 0.3806, "step": 445 }, { "epoch": 0.04340632603406326, "grad_norm": 1.7775332171720517, "learning_rate": 9.995338006948353e-06, "loss": 0.3332, "step": 446 }, { "epoch": 0.043503649635036494, "grad_norm": 1.7312883283317864, "learning_rate": 9.995269711040067e-06, "loss": 0.2736, "step": 447 }, { "epoch": 0.043600973236009734, "grad_norm": 1.7973901424872405, "learning_rate": 9.995200918748759e-06, "loss": 0.5597, "step": 448 }, { "epoch": 0.04369829683698297, "grad_norm": 2.0409413301370334, "learning_rate": 9.995131630081265e-06, "loss": 0.6045, "step": 449 }, { "epoch": 0.043795620437956206, "grad_norm": 3.2708903670147347, "learning_rate": 9.995061845044473e-06, "loss": 0.6245, "step": 450 }, { "epoch": 0.04389294403892944, "grad_norm": 1.744466889932859, "learning_rate": 9.994991563645314e-06, "loss": 0.4129, "step": 451 }, { "epoch": 0.04399026763990268, "grad_norm": 1.8775864246251477, "learning_rate": 9.994920785890771e-06, "loss": 0.414, "step": 452 }, { "epoch": 0.04408759124087591, "grad_norm": 1.3868286948878126, "learning_rate": 9.994849511787881e-06, "loss": 0.3164, "step": 453 }, { "epoch": 0.04418491484184915, "grad_norm": 1.6888257223301795, "learning_rate": 9.994777741343727e-06, "loss": 0.3241, "step": 454 }, { "epoch": 0.044282238442822384, "grad_norm": 1.5029594314338663, "learning_rate": 9.994705474565436e-06, "loss": 0.4148, "step": 455 }, { "epoch": 0.04437956204379562, "grad_norm": 1.7159996915963702, "learning_rate": 9.994632711460193e-06, "loss": 0.3387, "step": 456 }, { "epoch": 0.04447688564476886, "grad_norm": 1.7717997513120352, "learning_rate": 9.994559452035228e-06, "loss": 0.4547, "step": 457 }, { "epoch": 0.04457420924574209, "grad_norm": 1.887765282184233, "learning_rate": 9.99448569629782e-06, "loss": 0.5919, "step": 458 }, { "epoch": 0.04467153284671533, "grad_norm": 2.0151049512314585, "learning_rate": 9.994411444255298e-06, "loss": 0.4556, "step": 459 }, { "epoch": 0.04476885644768856, "grad_norm": 1.5706463359289826, "learning_rate": 9.994336695915041e-06, "loss": 0.3443, "step": 460 }, { "epoch": 0.0448661800486618, "grad_norm": 1.9067884841542395, "learning_rate": 9.994261451284477e-06, "loss": 0.5862, "step": 461 }, { "epoch": 0.044963503649635035, "grad_norm": 1.7346846845298518, "learning_rate": 9.994185710371083e-06, "loss": 0.3588, "step": 462 }, { "epoch": 0.045060827250608275, "grad_norm": 1.5593715629463312, "learning_rate": 9.994109473182385e-06, "loss": 0.2891, "step": 463 }, { "epoch": 0.04515815085158151, "grad_norm": 2.326736753149576, "learning_rate": 9.994032739725959e-06, "loss": 0.6517, "step": 464 }, { "epoch": 0.04525547445255475, "grad_norm": 2.2142852132770305, "learning_rate": 9.99395551000943e-06, "loss": 0.3571, "step": 465 }, { "epoch": 0.04535279805352798, "grad_norm": 1.7351954813390544, "learning_rate": 9.993877784040474e-06, "loss": 0.3849, "step": 466 }, { "epoch": 0.04545012165450121, "grad_norm": 1.3962336815381617, "learning_rate": 9.993799561826811e-06, "loss": 0.311, "step": 467 }, { "epoch": 0.04554744525547445, "grad_norm": 1.878958465421645, "learning_rate": 9.993720843376216e-06, "loss": 0.5602, "step": 468 }, { "epoch": 0.045644768856447686, "grad_norm": 1.519160992933857, "learning_rate": 9.993641628696513e-06, "loss": 0.2379, "step": 469 }, { "epoch": 0.045742092457420926, "grad_norm": 2.5345930464298885, "learning_rate": 9.99356191779557e-06, "loss": 0.4239, "step": 470 }, { "epoch": 0.04583941605839416, "grad_norm": 1.3153911718041251, "learning_rate": 9.993481710681314e-06, "loss": 0.3454, "step": 471 }, { "epoch": 0.0459367396593674, "grad_norm": 2.16208125563947, "learning_rate": 9.993401007361707e-06, "loss": 0.5386, "step": 472 }, { "epoch": 0.04603406326034063, "grad_norm": 1.8150842593472827, "learning_rate": 9.993319807844775e-06, "loss": 0.3077, "step": 473 }, { "epoch": 0.04613138686131387, "grad_norm": 1.6656864462678063, "learning_rate": 9.993238112138584e-06, "loss": 0.4927, "step": 474 }, { "epoch": 0.046228710462287104, "grad_norm": 1.3429917702468868, "learning_rate": 9.993155920251252e-06, "loss": 0.2433, "step": 475 }, { "epoch": 0.04632603406326034, "grad_norm": 1.3651155739367906, "learning_rate": 9.993073232190949e-06, "loss": 0.2947, "step": 476 }, { "epoch": 0.046423357664233576, "grad_norm": 1.7815516701613203, "learning_rate": 9.992990047965887e-06, "loss": 0.5372, "step": 477 }, { "epoch": 0.046520681265206816, "grad_norm": 1.846696342179327, "learning_rate": 9.992906367584337e-06, "loss": 0.5127, "step": 478 }, { "epoch": 0.04661800486618005, "grad_norm": 1.7511253825578088, "learning_rate": 9.992822191054612e-06, "loss": 0.4074, "step": 479 }, { "epoch": 0.04671532846715328, "grad_norm": 1.8105635986872588, "learning_rate": 9.992737518385076e-06, "loss": 0.4998, "step": 480 }, { "epoch": 0.04681265206812652, "grad_norm": 2.2743597617900746, "learning_rate": 9.992652349584147e-06, "loss": 0.6249, "step": 481 }, { "epoch": 0.046909975669099754, "grad_norm": 1.93948496382319, "learning_rate": 9.992566684660282e-06, "loss": 0.5411, "step": 482 }, { "epoch": 0.047007299270072994, "grad_norm": 1.4073760716303516, "learning_rate": 9.992480523621999e-06, "loss": 0.3506, "step": 483 }, { "epoch": 0.04710462287104623, "grad_norm": 1.388293079160528, "learning_rate": 9.992393866477856e-06, "loss": 0.3304, "step": 484 }, { "epoch": 0.04720194647201947, "grad_norm": 2.082643572745618, "learning_rate": 9.992306713236467e-06, "loss": 0.5653, "step": 485 }, { "epoch": 0.0472992700729927, "grad_norm": 1.7104664332606834, "learning_rate": 9.992219063906492e-06, "loss": 0.3317, "step": 486 }, { "epoch": 0.04739659367396594, "grad_norm": 1.7575848919482624, "learning_rate": 9.992130918496638e-06, "loss": 0.4109, "step": 487 }, { "epoch": 0.04749391727493917, "grad_norm": 1.7351379091271637, "learning_rate": 9.992042277015668e-06, "loss": 0.5065, "step": 488 }, { "epoch": 0.04759124087591241, "grad_norm": 1.4444570948381004, "learning_rate": 9.991953139472387e-06, "loss": 0.4023, "step": 489 }, { "epoch": 0.047688564476885645, "grad_norm": 1.4697709289140384, "learning_rate": 9.991863505875656e-06, "loss": 0.3364, "step": 490 }, { "epoch": 0.04778588807785888, "grad_norm": 1.9428205960506804, "learning_rate": 9.99177337623438e-06, "loss": 0.4303, "step": 491 }, { "epoch": 0.04788321167883212, "grad_norm": 1.931152158561148, "learning_rate": 9.991682750557516e-06, "loss": 0.2857, "step": 492 }, { "epoch": 0.04798053527980535, "grad_norm": 1.9301394655308035, "learning_rate": 9.991591628854067e-06, "loss": 0.5998, "step": 493 }, { "epoch": 0.04807785888077859, "grad_norm": 1.7788293016868693, "learning_rate": 9.99150001113309e-06, "loss": 0.4595, "step": 494 }, { "epoch": 0.04817518248175182, "grad_norm": 2.0641225732440134, "learning_rate": 9.99140789740369e-06, "loss": 0.3848, "step": 495 }, { "epoch": 0.04827250608272506, "grad_norm": 2.2832955373527044, "learning_rate": 9.99131528767502e-06, "loss": 0.6396, "step": 496 }, { "epoch": 0.048369829683698295, "grad_norm": 1.6658790952812916, "learning_rate": 9.99122218195628e-06, "loss": 0.5429, "step": 497 }, { "epoch": 0.048467153284671535, "grad_norm": 1.6568038302360257, "learning_rate": 9.991128580256725e-06, "loss": 0.4532, "step": 498 }, { "epoch": 0.04856447688564477, "grad_norm": 1.8451659374514144, "learning_rate": 9.991034482585656e-06, "loss": 0.5845, "step": 499 }, { "epoch": 0.04866180048661801, "grad_norm": 1.9103948838029656, "learning_rate": 9.99093988895242e-06, "loss": 0.5508, "step": 500 }, { "epoch": 0.04875912408759124, "grad_norm": 1.9691733858712537, "learning_rate": 9.990844799366422e-06, "loss": 0.6374, "step": 501 }, { "epoch": 0.048856447688564474, "grad_norm": 2.1278472226161846, "learning_rate": 9.990749213837108e-06, "loss": 0.572, "step": 502 }, { "epoch": 0.04895377128953771, "grad_norm": 1.9704028865885994, "learning_rate": 9.990653132373977e-06, "loss": 0.6282, "step": 503 }, { "epoch": 0.049051094890510946, "grad_norm": 1.8965741341561362, "learning_rate": 9.990556554986577e-06, "loss": 0.5749, "step": 504 }, { "epoch": 0.049148418491484186, "grad_norm": 1.5425018763105707, "learning_rate": 9.990459481684504e-06, "loss": 0.4236, "step": 505 }, { "epoch": 0.04924574209245742, "grad_norm": 1.736669998068125, "learning_rate": 9.990361912477405e-06, "loss": 0.4275, "step": 506 }, { "epoch": 0.04934306569343066, "grad_norm": 2.049335776858506, "learning_rate": 9.990263847374976e-06, "loss": 0.6897, "step": 507 }, { "epoch": 0.04944038929440389, "grad_norm": 1.8544975871268152, "learning_rate": 9.990165286386961e-06, "loss": 0.4811, "step": 508 }, { "epoch": 0.04953771289537713, "grad_norm": 1.5709178763522822, "learning_rate": 9.990066229523155e-06, "loss": 0.4585, "step": 509 }, { "epoch": 0.049635036496350364, "grad_norm": 2.1410068811754153, "learning_rate": 9.989966676793399e-06, "loss": 0.4773, "step": 510 }, { "epoch": 0.049732360097323604, "grad_norm": 1.760724042734433, "learning_rate": 9.989866628207589e-06, "loss": 0.3144, "step": 511 }, { "epoch": 0.04982968369829684, "grad_norm": 1.8521560168370175, "learning_rate": 9.989766083775662e-06, "loss": 0.4656, "step": 512 }, { "epoch": 0.049927007299270076, "grad_norm": 1.544987615640627, "learning_rate": 9.989665043507616e-06, "loss": 0.4089, "step": 513 }, { "epoch": 0.05002433090024331, "grad_norm": 1.9122960249889975, "learning_rate": 9.989563507413487e-06, "loss": 0.4535, "step": 514 }, { "epoch": 0.05012165450121654, "grad_norm": 1.5187134098621655, "learning_rate": 9.989461475503363e-06, "loss": 0.31, "step": 515 }, { "epoch": 0.05021897810218978, "grad_norm": 1.562160455050312, "learning_rate": 9.989358947787389e-06, "loss": 0.4009, "step": 516 }, { "epoch": 0.050316301703163015, "grad_norm": 1.738084966314413, "learning_rate": 9.989255924275746e-06, "loss": 0.4723, "step": 517 }, { "epoch": 0.050413625304136254, "grad_norm": 2.156580581755068, "learning_rate": 9.989152404978678e-06, "loss": 0.4407, "step": 518 }, { "epoch": 0.05051094890510949, "grad_norm": 1.8652302207700793, "learning_rate": 9.989048389906469e-06, "loss": 0.587, "step": 519 }, { "epoch": 0.05060827250608273, "grad_norm": 1.5934369396830426, "learning_rate": 9.988943879069452e-06, "loss": 0.3961, "step": 520 }, { "epoch": 0.05070559610705596, "grad_norm": 1.4294562647861604, "learning_rate": 9.988838872478017e-06, "loss": 0.3382, "step": 521 }, { "epoch": 0.0508029197080292, "grad_norm": 1.5693240874435923, "learning_rate": 9.988733370142598e-06, "loss": 0.3876, "step": 522 }, { "epoch": 0.05090024330900243, "grad_norm": 1.6720738515514542, "learning_rate": 9.988627372073678e-06, "loss": 0.448, "step": 523 }, { "epoch": 0.05099756690997567, "grad_norm": 2.0438207304961367, "learning_rate": 9.988520878281787e-06, "loss": 0.5724, "step": 524 }, { "epoch": 0.051094890510948905, "grad_norm": 2.0003463921985456, "learning_rate": 9.988413888777512e-06, "loss": 0.4506, "step": 525 }, { "epoch": 0.05119221411192214, "grad_norm": 2.11812759304704, "learning_rate": 9.988306403571482e-06, "loss": 0.757, "step": 526 }, { "epoch": 0.05128953771289538, "grad_norm": 1.5594386055307068, "learning_rate": 9.98819842267438e-06, "loss": 0.4145, "step": 527 }, { "epoch": 0.05138686131386861, "grad_norm": 1.917978943216931, "learning_rate": 9.988089946096933e-06, "loss": 0.5363, "step": 528 }, { "epoch": 0.05148418491484185, "grad_norm": 1.3212282063862113, "learning_rate": 9.987980973849924e-06, "loss": 0.3132, "step": 529 }, { "epoch": 0.05158150851581508, "grad_norm": 1.2285769982465171, "learning_rate": 9.987871505944177e-06, "loss": 0.2287, "step": 530 }, { "epoch": 0.05167883211678832, "grad_norm": 1.849610792922833, "learning_rate": 9.987761542390574e-06, "loss": 0.6487, "step": 531 }, { "epoch": 0.051776155717761556, "grad_norm": 1.158461389164102, "learning_rate": 9.987651083200044e-06, "loss": 0.2111, "step": 532 }, { "epoch": 0.051873479318734796, "grad_norm": 1.8450520976911682, "learning_rate": 9.987540128383556e-06, "loss": 0.5579, "step": 533 }, { "epoch": 0.05197080291970803, "grad_norm": 1.9047794610871986, "learning_rate": 9.98742867795214e-06, "loss": 0.4542, "step": 534 }, { "epoch": 0.05206812652068127, "grad_norm": 1.5564676952152843, "learning_rate": 9.987316731916872e-06, "loss": 0.4467, "step": 535 }, { "epoch": 0.0521654501216545, "grad_norm": 1.403952395827601, "learning_rate": 9.987204290288876e-06, "loss": 0.3761, "step": 536 }, { "epoch": 0.052262773722627734, "grad_norm": 1.948151749349848, "learning_rate": 9.987091353079323e-06, "loss": 0.5782, "step": 537 }, { "epoch": 0.052360097323600974, "grad_norm": 1.6211222818460531, "learning_rate": 9.986977920299437e-06, "loss": 0.4047, "step": 538 }, { "epoch": 0.052457420924574207, "grad_norm": 1.4911900726837217, "learning_rate": 9.986863991960491e-06, "loss": 0.3817, "step": 539 }, { "epoch": 0.052554744525547446, "grad_norm": 1.530872687739145, "learning_rate": 9.986749568073804e-06, "loss": 0.4639, "step": 540 }, { "epoch": 0.05265206812652068, "grad_norm": 1.766399180057757, "learning_rate": 9.986634648650746e-06, "loss": 0.5132, "step": 541 }, { "epoch": 0.05274939172749392, "grad_norm": 1.7318370911583716, "learning_rate": 9.98651923370274e-06, "loss": 0.5845, "step": 542 }, { "epoch": 0.05284671532846715, "grad_norm": 1.4523428175637472, "learning_rate": 9.986403323241252e-06, "loss": 0.3817, "step": 543 }, { "epoch": 0.05294403892944039, "grad_norm": 1.3085205057626972, "learning_rate": 9.9862869172778e-06, "loss": 0.294, "step": 544 }, { "epoch": 0.053041362530413624, "grad_norm": 1.749260064779093, "learning_rate": 9.986170015823953e-06, "loss": 0.3885, "step": 545 }, { "epoch": 0.053138686131386864, "grad_norm": 1.9224820302612053, "learning_rate": 9.986052618891326e-06, "loss": 0.5841, "step": 546 }, { "epoch": 0.0532360097323601, "grad_norm": 1.6019594770490224, "learning_rate": 9.985934726491587e-06, "loss": 0.5602, "step": 547 }, { "epoch": 0.05333333333333334, "grad_norm": 1.63788543125369, "learning_rate": 9.98581633863645e-06, "loss": 0.4913, "step": 548 }, { "epoch": 0.05343065693430657, "grad_norm": 1.7751230304686407, "learning_rate": 9.985697455337677e-06, "loss": 0.4575, "step": 549 }, { "epoch": 0.0535279805352798, "grad_norm": 1.4813830287768246, "learning_rate": 9.985578076607086e-06, "loss": 0.2811, "step": 550 }, { "epoch": 0.05362530413625304, "grad_norm": 1.8047180833743464, "learning_rate": 9.985458202456534e-06, "loss": 0.5564, "step": 551 }, { "epoch": 0.053722627737226275, "grad_norm": 1.4776771818705197, "learning_rate": 9.985337832897938e-06, "loss": 0.2842, "step": 552 }, { "epoch": 0.053819951338199515, "grad_norm": 1.800973083472876, "learning_rate": 9.985216967943256e-06, "loss": 0.4017, "step": 553 }, { "epoch": 0.05391727493917275, "grad_norm": 1.4167019147788764, "learning_rate": 9.985095607604502e-06, "loss": 0.2676, "step": 554 }, { "epoch": 0.05401459854014599, "grad_norm": 1.462279330828973, "learning_rate": 9.984973751893732e-06, "loss": 0.342, "step": 555 }, { "epoch": 0.05411192214111922, "grad_norm": 1.7941608662857766, "learning_rate": 9.984851400823056e-06, "loss": 0.4851, "step": 556 }, { "epoch": 0.05420924574209246, "grad_norm": 1.865163176610701, "learning_rate": 9.984728554404632e-06, "loss": 0.5938, "step": 557 }, { "epoch": 0.05430656934306569, "grad_norm": 1.9578700904261006, "learning_rate": 9.984605212650669e-06, "loss": 0.5846, "step": 558 }, { "epoch": 0.05440389294403893, "grad_norm": 1.7615345522382602, "learning_rate": 9.98448137557342e-06, "loss": 0.5517, "step": 559 }, { "epoch": 0.054501216545012166, "grad_norm": 1.7987507193579173, "learning_rate": 9.984357043185195e-06, "loss": 0.4511, "step": 560 }, { "epoch": 0.0545985401459854, "grad_norm": 1.8966136067258859, "learning_rate": 9.984232215498347e-06, "loss": 0.3339, "step": 561 }, { "epoch": 0.05469586374695864, "grad_norm": 1.760439118311743, "learning_rate": 9.98410689252528e-06, "loss": 0.4797, "step": 562 }, { "epoch": 0.05479318734793187, "grad_norm": 1.7467534741216573, "learning_rate": 9.983981074278448e-06, "loss": 0.3854, "step": 563 }, { "epoch": 0.05489051094890511, "grad_norm": 1.638747457914032, "learning_rate": 9.983854760770353e-06, "loss": 0.3215, "step": 564 }, { "epoch": 0.054987834549878344, "grad_norm": 1.565721167011275, "learning_rate": 9.983727952013546e-06, "loss": 0.3573, "step": 565 }, { "epoch": 0.05508515815085158, "grad_norm": 1.819373023432736, "learning_rate": 9.98360064802063e-06, "loss": 0.304, "step": 566 }, { "epoch": 0.055182481751824816, "grad_norm": 2.219648367380945, "learning_rate": 9.983472848804254e-06, "loss": 0.7398, "step": 567 }, { "epoch": 0.055279805352798056, "grad_norm": 1.7935096739228122, "learning_rate": 9.98334455437712e-06, "loss": 0.3257, "step": 568 }, { "epoch": 0.05537712895377129, "grad_norm": 2.085379879601924, "learning_rate": 9.983215764751971e-06, "loss": 0.3477, "step": 569 }, { "epoch": 0.05547445255474453, "grad_norm": 1.528881264990704, "learning_rate": 9.98308647994161e-06, "loss": 0.4173, "step": 570 }, { "epoch": 0.05557177615571776, "grad_norm": 1.282510416609492, "learning_rate": 9.982956699958883e-06, "loss": 0.3513, "step": 571 }, { "epoch": 0.055669099756690994, "grad_norm": 1.6035600811723405, "learning_rate": 9.982826424816688e-06, "loss": 0.3318, "step": 572 }, { "epoch": 0.055766423357664234, "grad_norm": 1.9455996381881653, "learning_rate": 9.982695654527966e-06, "loss": 0.4991, "step": 573 }, { "epoch": 0.05586374695863747, "grad_norm": 1.8397262762514839, "learning_rate": 9.982564389105714e-06, "loss": 0.345, "step": 574 }, { "epoch": 0.05596107055961071, "grad_norm": 1.7997461351876956, "learning_rate": 9.982432628562978e-06, "loss": 0.5384, "step": 575 }, { "epoch": 0.05605839416058394, "grad_norm": 1.6246101205121968, "learning_rate": 9.982300372912848e-06, "loss": 0.5499, "step": 576 }, { "epoch": 0.05615571776155718, "grad_norm": 1.9184631207748861, "learning_rate": 9.982167622168467e-06, "loss": 0.449, "step": 577 }, { "epoch": 0.05625304136253041, "grad_norm": 1.5368079698239796, "learning_rate": 9.982034376343029e-06, "loss": 0.3311, "step": 578 }, { "epoch": 0.05635036496350365, "grad_norm": 1.9061539422519105, "learning_rate": 9.98190063544977e-06, "loss": 0.4182, "step": 579 }, { "epoch": 0.056447688564476885, "grad_norm": 1.6727227174184238, "learning_rate": 9.981766399501984e-06, "loss": 0.482, "step": 580 }, { "epoch": 0.056545012165450124, "grad_norm": 1.8546055763617424, "learning_rate": 9.98163166851301e-06, "loss": 0.5758, "step": 581 }, { "epoch": 0.05664233576642336, "grad_norm": 2.0350303098403706, "learning_rate": 9.981496442496234e-06, "loss": 0.5236, "step": 582 }, { "epoch": 0.0567396593673966, "grad_norm": 1.3907379790284926, "learning_rate": 9.981360721465095e-06, "loss": 0.3375, "step": 583 }, { "epoch": 0.05683698296836983, "grad_norm": 2.0168702766261486, "learning_rate": 9.98122450543308e-06, "loss": 0.595, "step": 584 }, { "epoch": 0.05693430656934306, "grad_norm": 1.7248754760467295, "learning_rate": 9.981087794413722e-06, "loss": 0.3747, "step": 585 }, { "epoch": 0.0570316301703163, "grad_norm": 1.8918865818240052, "learning_rate": 9.98095058842061e-06, "loss": 0.5805, "step": 586 }, { "epoch": 0.057128953771289535, "grad_norm": 1.8691153689026438, "learning_rate": 9.980812887467377e-06, "loss": 0.3451, "step": 587 }, { "epoch": 0.057226277372262775, "grad_norm": 1.7475224395533677, "learning_rate": 9.980674691567705e-06, "loss": 0.2789, "step": 588 }, { "epoch": 0.05732360097323601, "grad_norm": 1.876124489873064, "learning_rate": 9.980536000735328e-06, "loss": 0.5917, "step": 589 }, { "epoch": 0.05742092457420925, "grad_norm": 1.6438847446693803, "learning_rate": 9.980396814984025e-06, "loss": 0.3063, "step": 590 }, { "epoch": 0.05751824817518248, "grad_norm": 1.7609146888426583, "learning_rate": 9.980257134327634e-06, "loss": 0.4177, "step": 591 }, { "epoch": 0.05761557177615572, "grad_norm": 3.1047413099950445, "learning_rate": 9.980116958780027e-06, "loss": 0.2793, "step": 592 }, { "epoch": 0.05771289537712895, "grad_norm": 1.3365913263494138, "learning_rate": 9.979976288355137e-06, "loss": 0.2754, "step": 593 }, { "epoch": 0.05781021897810219, "grad_norm": 1.7378721977452198, "learning_rate": 9.979835123066943e-06, "loss": 0.4156, "step": 594 }, { "epoch": 0.057907542579075426, "grad_norm": 1.7652517953930271, "learning_rate": 9.979693462929472e-06, "loss": 0.3768, "step": 595 }, { "epoch": 0.05800486618004866, "grad_norm": 2.4155692425963675, "learning_rate": 9.979551307956801e-06, "loss": 0.6409, "step": 596 }, { "epoch": 0.0581021897810219, "grad_norm": 2.2339995809091913, "learning_rate": 9.979408658163055e-06, "loss": 0.3134, "step": 597 }, { "epoch": 0.05819951338199513, "grad_norm": 1.9788468018769068, "learning_rate": 9.97926551356241e-06, "loss": 0.2509, "step": 598 }, { "epoch": 0.05829683698296837, "grad_norm": 4.0668515887714385, "learning_rate": 9.979121874169091e-06, "loss": 0.3322, "step": 599 }, { "epoch": 0.058394160583941604, "grad_norm": 2.0552497355613264, "learning_rate": 9.97897773999737e-06, "loss": 0.2732, "step": 600 }, { "epoch": 0.058491484184914844, "grad_norm": 1.7372746328291984, "learning_rate": 9.978833111061573e-06, "loss": 0.3021, "step": 601 }, { "epoch": 0.058588807785888077, "grad_norm": 1.8426989129405926, "learning_rate": 9.978687987376067e-06, "loss": 0.3147, "step": 602 }, { "epoch": 0.058686131386861316, "grad_norm": 1.456816302033054, "learning_rate": 9.978542368955278e-06, "loss": 0.3669, "step": 603 }, { "epoch": 0.05878345498783455, "grad_norm": 2.1398878847147973, "learning_rate": 9.978396255813672e-06, "loss": 0.457, "step": 604 }, { "epoch": 0.05888077858880779, "grad_norm": 1.860652260495742, "learning_rate": 9.978249647965769e-06, "loss": 0.5567, "step": 605 }, { "epoch": 0.05897810218978102, "grad_norm": 1.7559525207322428, "learning_rate": 9.97810254542614e-06, "loss": 0.439, "step": 606 }, { "epoch": 0.059075425790754255, "grad_norm": 1.4912680944094816, "learning_rate": 9.977954948209402e-06, "loss": 0.4431, "step": 607 }, { "epoch": 0.059172749391727494, "grad_norm": 1.766690700595448, "learning_rate": 9.97780685633022e-06, "loss": 0.3187, "step": 608 }, { "epoch": 0.05927007299270073, "grad_norm": 2.169180646458804, "learning_rate": 9.977658269803312e-06, "loss": 0.5042, "step": 609 }, { "epoch": 0.05936739659367397, "grad_norm": 1.623119439845207, "learning_rate": 9.977509188643441e-06, "loss": 0.3632, "step": 610 }, { "epoch": 0.0594647201946472, "grad_norm": 2.0976883017366226, "learning_rate": 9.977359612865424e-06, "loss": 0.6465, "step": 611 }, { "epoch": 0.05956204379562044, "grad_norm": 1.59126192242755, "learning_rate": 9.977209542484123e-06, "loss": 0.4335, "step": 612 }, { "epoch": 0.05965936739659367, "grad_norm": 1.6532378246551842, "learning_rate": 9.97705897751445e-06, "loss": 0.3462, "step": 613 }, { "epoch": 0.05975669099756691, "grad_norm": 1.6478059833585124, "learning_rate": 9.976907917971365e-06, "loss": 0.4063, "step": 614 }, { "epoch": 0.059854014598540145, "grad_norm": 1.750559308727237, "learning_rate": 9.976756363869884e-06, "loss": 0.5062, "step": 615 }, { "epoch": 0.059951338199513385, "grad_norm": 1.6400113365898012, "learning_rate": 9.976604315225063e-06, "loss": 0.3699, "step": 616 }, { "epoch": 0.06004866180048662, "grad_norm": 1.5449283565959169, "learning_rate": 9.976451772052013e-06, "loss": 0.3635, "step": 617 }, { "epoch": 0.06014598540145986, "grad_norm": 1.3799772345005799, "learning_rate": 9.97629873436589e-06, "loss": 0.2747, "step": 618 }, { "epoch": 0.06024330900243309, "grad_norm": 1.9454941262632244, "learning_rate": 9.976145202181905e-06, "loss": 0.4963, "step": 619 }, { "epoch": 0.06034063260340632, "grad_norm": 1.5274916477255973, "learning_rate": 9.975991175515311e-06, "loss": 0.3348, "step": 620 }, { "epoch": 0.06043795620437956, "grad_norm": 1.9623540496009142, "learning_rate": 9.975836654381416e-06, "loss": 0.5373, "step": 621 }, { "epoch": 0.060535279805352796, "grad_norm": 1.4248144765181632, "learning_rate": 9.975681638795575e-06, "loss": 0.3137, "step": 622 }, { "epoch": 0.060632603406326036, "grad_norm": 1.4366236793713136, "learning_rate": 9.975526128773192e-06, "loss": 0.3519, "step": 623 }, { "epoch": 0.06072992700729927, "grad_norm": 1.8458441140553945, "learning_rate": 9.97537012432972e-06, "loss": 0.3937, "step": 624 }, { "epoch": 0.06082725060827251, "grad_norm": 1.868271580826056, "learning_rate": 9.975213625480658e-06, "loss": 0.4567, "step": 625 }, { "epoch": 0.06092457420924574, "grad_norm": 2.4613001964869223, "learning_rate": 9.97505663224156e-06, "loss": 0.5607, "step": 626 }, { "epoch": 0.06102189781021898, "grad_norm": 1.6709839772769468, "learning_rate": 9.974899144628027e-06, "loss": 0.3233, "step": 627 }, { "epoch": 0.061119221411192214, "grad_norm": 1.8046591620263965, "learning_rate": 9.97474116265571e-06, "loss": 0.3929, "step": 628 }, { "epoch": 0.06121654501216545, "grad_norm": 1.7182161369033975, "learning_rate": 9.974582686340304e-06, "loss": 0.3804, "step": 629 }, { "epoch": 0.061313868613138686, "grad_norm": 2.435940855169524, "learning_rate": 9.974423715697558e-06, "loss": 0.7453, "step": 630 }, { "epoch": 0.06141119221411192, "grad_norm": 1.401143104634322, "learning_rate": 9.974264250743272e-06, "loss": 0.306, "step": 631 }, { "epoch": 0.06150851581508516, "grad_norm": 1.540550326071636, "learning_rate": 9.97410429149329e-06, "loss": 0.3582, "step": 632 }, { "epoch": 0.06160583941605839, "grad_norm": 4.038520112503673, "learning_rate": 9.973943837963507e-06, "loss": 0.2688, "step": 633 }, { "epoch": 0.06170316301703163, "grad_norm": 2.032927304778425, "learning_rate": 9.973782890169867e-06, "loss": 0.6952, "step": 634 }, { "epoch": 0.061800486618004864, "grad_norm": 1.5242884680104736, "learning_rate": 9.973621448128364e-06, "loss": 0.3957, "step": 635 }, { "epoch": 0.061897810218978104, "grad_norm": 1.599953340803732, "learning_rate": 9.973459511855042e-06, "loss": 0.3783, "step": 636 }, { "epoch": 0.06199513381995134, "grad_norm": 2.1886899708740697, "learning_rate": 9.973297081365988e-06, "loss": 0.5426, "step": 637 }, { "epoch": 0.06209245742092458, "grad_norm": 1.363421719809718, "learning_rate": 9.973134156677349e-06, "loss": 0.2707, "step": 638 }, { "epoch": 0.06218978102189781, "grad_norm": 1.883218491971664, "learning_rate": 9.972970737805312e-06, "loss": 0.543, "step": 639 }, { "epoch": 0.06228710462287105, "grad_norm": 1.6336178778276322, "learning_rate": 9.972806824766117e-06, "loss": 0.4833, "step": 640 }, { "epoch": 0.06238442822384428, "grad_norm": 1.74145478719615, "learning_rate": 9.972642417576049e-06, "loss": 0.5456, "step": 641 }, { "epoch": 0.062481751824817515, "grad_norm": 1.3939447959630629, "learning_rate": 9.972477516251448e-06, "loss": 0.2935, "step": 642 }, { "epoch": 0.06257907542579075, "grad_norm": 1.9741261661680443, "learning_rate": 9.9723121208087e-06, "loss": 0.4377, "step": 643 }, { "epoch": 0.06267639902676399, "grad_norm": 2.214700253529172, "learning_rate": 9.972146231264242e-06, "loss": 0.6711, "step": 644 }, { "epoch": 0.06277372262773723, "grad_norm": 1.7399845992974294, "learning_rate": 9.971979847634554e-06, "loss": 0.5327, "step": 645 }, { "epoch": 0.06287104622871047, "grad_norm": 1.3552365502663122, "learning_rate": 9.971812969936174e-06, "loss": 0.3553, "step": 646 }, { "epoch": 0.06296836982968369, "grad_norm": 1.8378075997453163, "learning_rate": 9.971645598185685e-06, "loss": 0.3709, "step": 647 }, { "epoch": 0.06306569343065693, "grad_norm": 1.7441350204189767, "learning_rate": 9.971477732399714e-06, "loss": 0.489, "step": 648 }, { "epoch": 0.06316301703163017, "grad_norm": 2.083031963167252, "learning_rate": 9.971309372594947e-06, "loss": 0.6196, "step": 649 }, { "epoch": 0.06326034063260341, "grad_norm": 1.5678236487001533, "learning_rate": 9.971140518788112e-06, "loss": 0.3202, "step": 650 }, { "epoch": 0.06335766423357664, "grad_norm": 1.7281008810115812, "learning_rate": 9.970971170995988e-06, "loss": 0.4169, "step": 651 }, { "epoch": 0.06345498783454988, "grad_norm": 1.5626981990993993, "learning_rate": 9.970801329235402e-06, "loss": 0.4238, "step": 652 }, { "epoch": 0.06355231143552312, "grad_norm": 1.5338214380715702, "learning_rate": 9.970630993523234e-06, "loss": 0.278, "step": 653 }, { "epoch": 0.06364963503649634, "grad_norm": 1.7806299033721755, "learning_rate": 9.970460163876409e-06, "loss": 0.5649, "step": 654 }, { "epoch": 0.06374695863746958, "grad_norm": 1.9349681554929028, "learning_rate": 9.9702888403119e-06, "loss": 0.3297, "step": 655 }, { "epoch": 0.06384428223844282, "grad_norm": 1.4947723050696704, "learning_rate": 9.970117022846736e-06, "loss": 0.4077, "step": 656 }, { "epoch": 0.06394160583941606, "grad_norm": 1.5696774237596223, "learning_rate": 9.96994471149799e-06, "loss": 0.4681, "step": 657 }, { "epoch": 0.06403892944038929, "grad_norm": 1.7662095984112474, "learning_rate": 9.969771906282781e-06, "loss": 0.539, "step": 658 }, { "epoch": 0.06413625304136253, "grad_norm": 2.926336951253308, "learning_rate": 9.969598607218285e-06, "loss": 0.4196, "step": 659 }, { "epoch": 0.06423357664233577, "grad_norm": 3.148192138198314, "learning_rate": 9.96942481432172e-06, "loss": 0.4827, "step": 660 }, { "epoch": 0.06433090024330901, "grad_norm": 1.790436662552377, "learning_rate": 9.969250527610356e-06, "loss": 0.4972, "step": 661 }, { "epoch": 0.06442822384428223, "grad_norm": 1.4712739725679773, "learning_rate": 9.969075747101514e-06, "loss": 0.4112, "step": 662 }, { "epoch": 0.06452554744525547, "grad_norm": 1.4521996617982842, "learning_rate": 9.96890047281256e-06, "loss": 0.3729, "step": 663 }, { "epoch": 0.06462287104622871, "grad_norm": 1.5457088814513262, "learning_rate": 9.96872470476091e-06, "loss": 0.4294, "step": 664 }, { "epoch": 0.06472019464720194, "grad_norm": 1.7644033340951866, "learning_rate": 9.968548442964034e-06, "loss": 0.4487, "step": 665 }, { "epoch": 0.06481751824817518, "grad_norm": 1.632555708701406, "learning_rate": 9.968371687439446e-06, "loss": 0.3929, "step": 666 }, { "epoch": 0.06491484184914842, "grad_norm": 1.8990302396780172, "learning_rate": 9.968194438204708e-06, "loss": 0.4101, "step": 667 }, { "epoch": 0.06501216545012166, "grad_norm": 2.092762728551112, "learning_rate": 9.968016695277436e-06, "loss": 0.5712, "step": 668 }, { "epoch": 0.06510948905109488, "grad_norm": 1.5876668887386824, "learning_rate": 9.967838458675292e-06, "loss": 0.494, "step": 669 }, { "epoch": 0.06520681265206812, "grad_norm": 1.7536517597940893, "learning_rate": 9.967659728415985e-06, "loss": 0.6121, "step": 670 }, { "epoch": 0.06530413625304136, "grad_norm": 1.9021294255711243, "learning_rate": 9.96748050451728e-06, "loss": 0.3634, "step": 671 }, { "epoch": 0.0654014598540146, "grad_norm": 1.4457078547633553, "learning_rate": 9.96730078699698e-06, "loss": 0.4586, "step": 672 }, { "epoch": 0.06549878345498783, "grad_norm": 1.6474950184261972, "learning_rate": 9.967120575872952e-06, "loss": 0.5028, "step": 673 }, { "epoch": 0.06559610705596107, "grad_norm": 1.9901979572232373, "learning_rate": 9.966939871163098e-06, "loss": 0.6986, "step": 674 }, { "epoch": 0.06569343065693431, "grad_norm": 1.3671458210722949, "learning_rate": 9.966758672885375e-06, "loss": 0.3945, "step": 675 }, { "epoch": 0.06579075425790755, "grad_norm": 1.8371332697903162, "learning_rate": 9.96657698105779e-06, "loss": 0.6782, "step": 676 }, { "epoch": 0.06588807785888078, "grad_norm": 1.1955013749239556, "learning_rate": 9.966394795698397e-06, "loss": 0.242, "step": 677 }, { "epoch": 0.06598540145985402, "grad_norm": 1.5330975344313047, "learning_rate": 9.966212116825302e-06, "loss": 0.4351, "step": 678 }, { "epoch": 0.06608272506082725, "grad_norm": 1.539581985713935, "learning_rate": 9.966028944456657e-06, "loss": 0.3512, "step": 679 }, { "epoch": 0.06618004866180048, "grad_norm": 1.9573455375443363, "learning_rate": 9.965845278610661e-06, "loss": 0.4859, "step": 680 }, { "epoch": 0.06627737226277372, "grad_norm": 1.8387055004344444, "learning_rate": 9.96566111930557e-06, "loss": 0.3831, "step": 681 }, { "epoch": 0.06637469586374696, "grad_norm": 1.7056154014174738, "learning_rate": 9.96547646655968e-06, "loss": 0.4675, "step": 682 }, { "epoch": 0.0664720194647202, "grad_norm": 1.881602931580563, "learning_rate": 9.965291320391342e-06, "loss": 0.5955, "step": 683 }, { "epoch": 0.06656934306569343, "grad_norm": 2.9885065529853416, "learning_rate": 9.965105680818955e-06, "loss": 0.393, "step": 684 }, { "epoch": 0.06666666666666667, "grad_norm": 1.7363492709096229, "learning_rate": 9.964919547860963e-06, "loss": 0.4903, "step": 685 }, { "epoch": 0.0667639902676399, "grad_norm": 1.8182376939684146, "learning_rate": 9.964732921535863e-06, "loss": 0.5443, "step": 686 }, { "epoch": 0.06686131386861315, "grad_norm": 1.6914779965026407, "learning_rate": 9.964545801862202e-06, "loss": 0.5119, "step": 687 }, { "epoch": 0.06695863746958637, "grad_norm": 1.2736843314571082, "learning_rate": 9.964358188858573e-06, "loss": 0.2495, "step": 688 }, { "epoch": 0.06705596107055961, "grad_norm": 1.5831736266585599, "learning_rate": 9.96417008254362e-06, "loss": 0.4489, "step": 689 }, { "epoch": 0.06715328467153285, "grad_norm": 2.2148297560046224, "learning_rate": 9.963981482936034e-06, "loss": 0.5415, "step": 690 }, { "epoch": 0.06725060827250608, "grad_norm": 1.5025934211262992, "learning_rate": 9.963792390054558e-06, "loss": 0.3903, "step": 691 }, { "epoch": 0.06734793187347932, "grad_norm": 1.4602374679322867, "learning_rate": 9.96360280391798e-06, "loss": 0.3199, "step": 692 }, { "epoch": 0.06744525547445256, "grad_norm": 1.5813416284844282, "learning_rate": 9.963412724545142e-06, "loss": 0.3213, "step": 693 }, { "epoch": 0.0675425790754258, "grad_norm": 1.246883512769049, "learning_rate": 9.96322215195493e-06, "loss": 0.2644, "step": 694 }, { "epoch": 0.06763990267639902, "grad_norm": 1.7094335347253355, "learning_rate": 9.963031086166282e-06, "loss": 0.4761, "step": 695 }, { "epoch": 0.06773722627737226, "grad_norm": 1.6516611118524773, "learning_rate": 9.962839527198184e-06, "loss": 0.4823, "step": 696 }, { "epoch": 0.0678345498783455, "grad_norm": 1.3531669839243998, "learning_rate": 9.962647475069672e-06, "loss": 0.4272, "step": 697 }, { "epoch": 0.06793187347931874, "grad_norm": 1.9430916606586504, "learning_rate": 9.962454929799829e-06, "loss": 0.5776, "step": 698 }, { "epoch": 0.06802919708029197, "grad_norm": 1.8772536403383466, "learning_rate": 9.962261891407792e-06, "loss": 0.6338, "step": 699 }, { "epoch": 0.0681265206812652, "grad_norm": 1.3972932620324034, "learning_rate": 9.96206835991274e-06, "loss": 0.3671, "step": 700 }, { "epoch": 0.06822384428223845, "grad_norm": 1.287329601381866, "learning_rate": 9.961874335333904e-06, "loss": 0.2744, "step": 701 }, { "epoch": 0.06832116788321167, "grad_norm": 1.5600519457751545, "learning_rate": 9.961679817690566e-06, "loss": 0.4433, "step": 702 }, { "epoch": 0.06841849148418491, "grad_norm": 1.3898736874388666, "learning_rate": 9.961484807002056e-06, "loss": 0.4197, "step": 703 }, { "epoch": 0.06851581508515815, "grad_norm": 1.672202746628868, "learning_rate": 9.961289303287749e-06, "loss": 0.4601, "step": 704 }, { "epoch": 0.06861313868613139, "grad_norm": 1.7427655274680753, "learning_rate": 9.961093306567076e-06, "loss": 0.5845, "step": 705 }, { "epoch": 0.06871046228710462, "grad_norm": 1.794570108008766, "learning_rate": 9.960896816859512e-06, "loss": 0.3459, "step": 706 }, { "epoch": 0.06880778588807786, "grad_norm": 1.6024314197975584, "learning_rate": 9.960699834184582e-06, "loss": 0.4441, "step": 707 }, { "epoch": 0.0689051094890511, "grad_norm": 1.619306935418848, "learning_rate": 9.960502358561858e-06, "loss": 0.4647, "step": 708 }, { "epoch": 0.06900243309002434, "grad_norm": 1.5009190604836247, "learning_rate": 9.960304390010968e-06, "loss": 0.373, "step": 709 }, { "epoch": 0.06909975669099756, "grad_norm": 1.8613999824223078, "learning_rate": 9.960105928551583e-06, "loss": 0.3926, "step": 710 }, { "epoch": 0.0691970802919708, "grad_norm": 2.8907340364253757, "learning_rate": 9.959906974203422e-06, "loss": 0.5451, "step": 711 }, { "epoch": 0.06929440389294404, "grad_norm": 1.826374356881247, "learning_rate": 9.959707526986256e-06, "loss": 0.4341, "step": 712 }, { "epoch": 0.06939172749391727, "grad_norm": 2.5001373253299133, "learning_rate": 9.959507586919903e-06, "loss": 0.6643, "step": 713 }, { "epoch": 0.06948905109489051, "grad_norm": 1.769427365923108, "learning_rate": 9.959307154024234e-06, "loss": 0.5431, "step": 714 }, { "epoch": 0.06958637469586375, "grad_norm": 2.3285358245695322, "learning_rate": 9.959106228319166e-06, "loss": 0.5274, "step": 715 }, { "epoch": 0.06968369829683699, "grad_norm": 1.4070234926508725, "learning_rate": 9.958904809824663e-06, "loss": 0.3257, "step": 716 }, { "epoch": 0.06978102189781021, "grad_norm": 1.9284568290872997, "learning_rate": 9.958702898560742e-06, "loss": 0.5648, "step": 717 }, { "epoch": 0.06987834549878345, "grad_norm": 2.092543866644565, "learning_rate": 9.958500494547465e-06, "loss": 0.6256, "step": 718 }, { "epoch": 0.0699756690997567, "grad_norm": 1.5948763588365042, "learning_rate": 9.958297597804947e-06, "loss": 0.4011, "step": 719 }, { "epoch": 0.07007299270072993, "grad_norm": 1.2246362905267065, "learning_rate": 9.958094208353348e-06, "loss": 0.2444, "step": 720 }, { "epoch": 0.07017031630170316, "grad_norm": 1.2302916868666773, "learning_rate": 9.95789032621288e-06, "loss": 0.3191, "step": 721 }, { "epoch": 0.0702676399026764, "grad_norm": 1.5504396768673763, "learning_rate": 9.957685951403803e-06, "loss": 0.3112, "step": 722 }, { "epoch": 0.07036496350364964, "grad_norm": 2.1205819146422438, "learning_rate": 9.957481083946427e-06, "loss": 0.3453, "step": 723 }, { "epoch": 0.07046228710462286, "grad_norm": 2.048519725880563, "learning_rate": 9.957275723861108e-06, "loss": 0.5266, "step": 724 }, { "epoch": 0.0705596107055961, "grad_norm": 1.4453693275620771, "learning_rate": 9.957069871168253e-06, "loss": 0.3082, "step": 725 }, { "epoch": 0.07065693430656934, "grad_norm": 1.8824931146868138, "learning_rate": 9.956863525888318e-06, "loss": 0.588, "step": 726 }, { "epoch": 0.07075425790754258, "grad_norm": 1.6143333569692804, "learning_rate": 9.956656688041807e-06, "loss": 0.4126, "step": 727 }, { "epoch": 0.07085158150851581, "grad_norm": 1.7905307392122496, "learning_rate": 9.956449357649276e-06, "loss": 0.521, "step": 728 }, { "epoch": 0.07094890510948905, "grad_norm": 1.3295021098228834, "learning_rate": 9.956241534731325e-06, "loss": 0.31, "step": 729 }, { "epoch": 0.07104622871046229, "grad_norm": 1.5783278835300563, "learning_rate": 9.956033219308607e-06, "loss": 0.3091, "step": 730 }, { "epoch": 0.07114355231143553, "grad_norm": 1.9905003004076265, "learning_rate": 9.955824411401822e-06, "loss": 0.3843, "step": 731 }, { "epoch": 0.07124087591240875, "grad_norm": 1.7644558301646922, "learning_rate": 9.955615111031717e-06, "loss": 0.4288, "step": 732 }, { "epoch": 0.071338199513382, "grad_norm": 1.5922207695027908, "learning_rate": 9.955405318219096e-06, "loss": 0.4767, "step": 733 }, { "epoch": 0.07143552311435523, "grad_norm": 1.7054240956141933, "learning_rate": 9.955195032984798e-06, "loss": 0.4082, "step": 734 }, { "epoch": 0.07153284671532846, "grad_norm": 1.3954970063738148, "learning_rate": 9.954984255349729e-06, "loss": 0.318, "step": 735 }, { "epoch": 0.0716301703163017, "grad_norm": 1.7287069268697828, "learning_rate": 9.954772985334825e-06, "loss": 0.4998, "step": 736 }, { "epoch": 0.07172749391727494, "grad_norm": 1.4535895804720915, "learning_rate": 9.954561222961086e-06, "loss": 0.2489, "step": 737 }, { "epoch": 0.07182481751824818, "grad_norm": 1.7113518757446542, "learning_rate": 9.954348968249552e-06, "loss": 0.4578, "step": 738 }, { "epoch": 0.0719221411192214, "grad_norm": 1.6741613993254088, "learning_rate": 9.954136221221316e-06, "loss": 0.4907, "step": 739 }, { "epoch": 0.07201946472019465, "grad_norm": 1.590982465166657, "learning_rate": 9.95392298189752e-06, "loss": 0.4116, "step": 740 }, { "epoch": 0.07211678832116789, "grad_norm": 1.422974716648181, "learning_rate": 9.953709250299351e-06, "loss": 0.3501, "step": 741 }, { "epoch": 0.07221411192214112, "grad_norm": 1.8424007198547667, "learning_rate": 9.953495026448048e-06, "loss": 0.5647, "step": 742 }, { "epoch": 0.07231143552311435, "grad_norm": 1.6572484299897867, "learning_rate": 9.953280310364902e-06, "loss": 0.3937, "step": 743 }, { "epoch": 0.07240875912408759, "grad_norm": 1.6027770112754065, "learning_rate": 9.953065102071245e-06, "loss": 0.3845, "step": 744 }, { "epoch": 0.07250608272506083, "grad_norm": 1.3618658637431431, "learning_rate": 9.952849401588464e-06, "loss": 0.3946, "step": 745 }, { "epoch": 0.07260340632603407, "grad_norm": 1.63075572158439, "learning_rate": 9.952633208937997e-06, "loss": 0.4506, "step": 746 }, { "epoch": 0.0727007299270073, "grad_norm": 1.483187632244976, "learning_rate": 9.95241652414132e-06, "loss": 0.3908, "step": 747 }, { "epoch": 0.07279805352798054, "grad_norm": 2.147960263046311, "learning_rate": 9.952199347219972e-06, "loss": 0.5249, "step": 748 }, { "epoch": 0.07289537712895378, "grad_norm": 1.5046941105429004, "learning_rate": 9.951981678195529e-06, "loss": 0.3592, "step": 749 }, { "epoch": 0.072992700729927, "grad_norm": 1.1457618113072725, "learning_rate": 9.951763517089624e-06, "loss": 0.2197, "step": 750 }, { "epoch": 0.07309002433090024, "grad_norm": 1.9275946136488011, "learning_rate": 9.951544863923934e-06, "loss": 0.5692, "step": 751 }, { "epoch": 0.07318734793187348, "grad_norm": 1.9590929330277462, "learning_rate": 9.95132571872019e-06, "loss": 0.7243, "step": 752 }, { "epoch": 0.07328467153284672, "grad_norm": 2.1368780826391283, "learning_rate": 9.951106081500162e-06, "loss": 0.7601, "step": 753 }, { "epoch": 0.07338199513381995, "grad_norm": 2.0085695969306396, "learning_rate": 9.950885952285682e-06, "loss": 0.5541, "step": 754 }, { "epoch": 0.07347931873479319, "grad_norm": 1.9283983503616706, "learning_rate": 9.950665331098622e-06, "loss": 0.3832, "step": 755 }, { "epoch": 0.07357664233576643, "grad_norm": 1.4173732379297153, "learning_rate": 9.950444217960902e-06, "loss": 0.379, "step": 756 }, { "epoch": 0.07367396593673967, "grad_norm": 1.5015176407129935, "learning_rate": 9.9502226128945e-06, "loss": 0.4696, "step": 757 }, { "epoch": 0.07377128953771289, "grad_norm": 1.6746905852394565, "learning_rate": 9.950000515921434e-06, "loss": 0.2984, "step": 758 }, { "epoch": 0.07386861313868613, "grad_norm": 1.4429847737048944, "learning_rate": 9.949777927063776e-06, "loss": 0.3748, "step": 759 }, { "epoch": 0.07396593673965937, "grad_norm": 1.1895632638034424, "learning_rate": 9.94955484634364e-06, "loss": 0.3014, "step": 760 }, { "epoch": 0.0740632603406326, "grad_norm": 1.5497241513071458, "learning_rate": 9.949331273783198e-06, "loss": 0.5458, "step": 761 }, { "epoch": 0.07416058394160584, "grad_norm": 1.5531214201672936, "learning_rate": 9.949107209404664e-06, "loss": 0.4575, "step": 762 }, { "epoch": 0.07425790754257908, "grad_norm": 1.3336107839559097, "learning_rate": 9.948882653230306e-06, "loss": 0.4227, "step": 763 }, { "epoch": 0.07435523114355232, "grad_norm": 1.7418209768074853, "learning_rate": 9.948657605282437e-06, "loss": 0.659, "step": 764 }, { "epoch": 0.07445255474452554, "grad_norm": 1.462439433090815, "learning_rate": 9.94843206558342e-06, "loss": 0.445, "step": 765 }, { "epoch": 0.07454987834549878, "grad_norm": 1.0856086178050317, "learning_rate": 9.948206034155666e-06, "loss": 0.2245, "step": 766 }, { "epoch": 0.07464720194647202, "grad_norm": 1.458503858496447, "learning_rate": 9.947979511021638e-06, "loss": 0.3009, "step": 767 }, { "epoch": 0.07474452554744526, "grad_norm": 1.1921292471996519, "learning_rate": 9.947752496203844e-06, "loss": 0.2988, "step": 768 }, { "epoch": 0.07484184914841849, "grad_norm": 1.6693024138876786, "learning_rate": 9.947524989724844e-06, "loss": 0.4783, "step": 769 }, { "epoch": 0.07493917274939173, "grad_norm": 1.4928671202909605, "learning_rate": 9.947296991607244e-06, "loss": 0.4161, "step": 770 }, { "epoch": 0.07503649635036497, "grad_norm": 1.4549005796935413, "learning_rate": 9.947068501873702e-06, "loss": 0.4186, "step": 771 }, { "epoch": 0.0751338199513382, "grad_norm": 1.7544781744298734, "learning_rate": 9.946839520546923e-06, "loss": 0.5593, "step": 772 }, { "epoch": 0.07523114355231143, "grad_norm": 1.561541454027553, "learning_rate": 9.946610047649659e-06, "loss": 0.5097, "step": 773 }, { "epoch": 0.07532846715328467, "grad_norm": 1.598616630831168, "learning_rate": 9.946380083204714e-06, "loss": 0.3744, "step": 774 }, { "epoch": 0.07542579075425791, "grad_norm": 1.6915556597188157, "learning_rate": 9.94614962723494e-06, "loss": 0.439, "step": 775 }, { "epoch": 0.07552311435523114, "grad_norm": 1.220024420697048, "learning_rate": 9.945918679763237e-06, "loss": 0.2339, "step": 776 }, { "epoch": 0.07562043795620438, "grad_norm": 1.6061445238682988, "learning_rate": 9.945687240812556e-06, "loss": 0.4493, "step": 777 }, { "epoch": 0.07571776155717762, "grad_norm": 1.400813806243779, "learning_rate": 9.945455310405895e-06, "loss": 0.4513, "step": 778 }, { "epoch": 0.07581508515815086, "grad_norm": 1.753751480308555, "learning_rate": 9.945222888566298e-06, "loss": 0.5379, "step": 779 }, { "epoch": 0.07591240875912408, "grad_norm": 1.4421667558329163, "learning_rate": 9.944989975316862e-06, "loss": 0.4118, "step": 780 }, { "epoch": 0.07600973236009732, "grad_norm": 1.4411974086247974, "learning_rate": 9.944756570680733e-06, "loss": 0.3295, "step": 781 }, { "epoch": 0.07610705596107056, "grad_norm": 1.5545586767450623, "learning_rate": 9.944522674681107e-06, "loss": 0.4146, "step": 782 }, { "epoch": 0.07620437956204379, "grad_norm": 2.0019900434858084, "learning_rate": 9.944288287341222e-06, "loss": 0.4945, "step": 783 }, { "epoch": 0.07630170316301703, "grad_norm": 1.5834930071710975, "learning_rate": 9.944053408684371e-06, "loss": 0.3781, "step": 784 }, { "epoch": 0.07639902676399027, "grad_norm": 1.5272521164667598, "learning_rate": 9.943818038733894e-06, "loss": 0.3865, "step": 785 }, { "epoch": 0.07649635036496351, "grad_norm": 1.8005925077547513, "learning_rate": 9.94358217751318e-06, "loss": 0.3951, "step": 786 }, { "epoch": 0.07659367396593673, "grad_norm": 2.0471085276865995, "learning_rate": 9.943345825045664e-06, "loss": 0.6391, "step": 787 }, { "epoch": 0.07669099756690997, "grad_norm": 1.7893386028077656, "learning_rate": 9.943108981354839e-06, "loss": 0.6373, "step": 788 }, { "epoch": 0.07678832116788321, "grad_norm": 1.6529186502183046, "learning_rate": 9.942871646464234e-06, "loss": 0.4901, "step": 789 }, { "epoch": 0.07688564476885645, "grad_norm": 1.8449837387732961, "learning_rate": 9.942633820397436e-06, "loss": 0.4444, "step": 790 }, { "epoch": 0.07698296836982968, "grad_norm": 1.5278738521461448, "learning_rate": 9.942395503178077e-06, "loss": 0.3701, "step": 791 }, { "epoch": 0.07708029197080292, "grad_norm": 1.8197808533034088, "learning_rate": 9.942156694829838e-06, "loss": 0.6142, "step": 792 }, { "epoch": 0.07717761557177616, "grad_norm": 1.8496691201700692, "learning_rate": 9.941917395376452e-06, "loss": 0.2021, "step": 793 }, { "epoch": 0.07727493917274939, "grad_norm": 1.8762664332677217, "learning_rate": 9.941677604841696e-06, "loss": 0.6742, "step": 794 }, { "epoch": 0.07737226277372262, "grad_norm": 1.5933514264940258, "learning_rate": 9.9414373232494e-06, "loss": 0.5156, "step": 795 }, { "epoch": 0.07746958637469586, "grad_norm": 1.538651154827247, "learning_rate": 9.94119655062344e-06, "loss": 0.446, "step": 796 }, { "epoch": 0.0775669099756691, "grad_norm": 3.7300878200470926, "learning_rate": 9.94095528698774e-06, "loss": 0.2745, "step": 797 }, { "epoch": 0.07766423357664233, "grad_norm": 1.685774804326696, "learning_rate": 9.940713532366277e-06, "loss": 0.4236, "step": 798 }, { "epoch": 0.07776155717761557, "grad_norm": 1.2528388212678458, "learning_rate": 9.940471286783074e-06, "loss": 0.308, "step": 799 }, { "epoch": 0.07785888077858881, "grad_norm": 1.5082779398207746, "learning_rate": 9.940228550262203e-06, "loss": 0.4925, "step": 800 }, { "epoch": 0.07795620437956205, "grad_norm": 1.544326069333433, "learning_rate": 9.939985322827784e-06, "loss": 0.4341, "step": 801 }, { "epoch": 0.07805352798053528, "grad_norm": 1.4959220289677864, "learning_rate": 9.939741604503987e-06, "loss": 0.4548, "step": 802 }, { "epoch": 0.07815085158150852, "grad_norm": 1.682287714178995, "learning_rate": 9.93949739531503e-06, "loss": 0.5277, "step": 803 }, { "epoch": 0.07824817518248176, "grad_norm": 1.6519496438708445, "learning_rate": 9.93925269528518e-06, "loss": 0.3074, "step": 804 }, { "epoch": 0.07834549878345498, "grad_norm": 1.4379883641500402, "learning_rate": 9.939007504438756e-06, "loss": 0.3069, "step": 805 }, { "epoch": 0.07844282238442822, "grad_norm": 2.0644552037743793, "learning_rate": 9.93876182280012e-06, "loss": 0.4479, "step": 806 }, { "epoch": 0.07854014598540146, "grad_norm": 1.4791313310441092, "learning_rate": 9.938515650393685e-06, "loss": 0.4255, "step": 807 }, { "epoch": 0.0786374695863747, "grad_norm": 1.4280736600967436, "learning_rate": 9.938268987243914e-06, "loss": 0.466, "step": 808 }, { "epoch": 0.07873479318734793, "grad_norm": 1.610976672135659, "learning_rate": 9.93802183337532e-06, "loss": 0.4327, "step": 809 }, { "epoch": 0.07883211678832117, "grad_norm": 1.5447130604673693, "learning_rate": 9.93777418881246e-06, "loss": 0.4931, "step": 810 }, { "epoch": 0.0789294403892944, "grad_norm": 1.3831325957946852, "learning_rate": 9.937526053579944e-06, "loss": 0.3877, "step": 811 }, { "epoch": 0.07902676399026765, "grad_norm": 1.4247112282736865, "learning_rate": 9.93727742770243e-06, "loss": 0.4168, "step": 812 }, { "epoch": 0.07912408759124087, "grad_norm": 1.5074130304911886, "learning_rate": 9.937028311204624e-06, "loss": 0.4747, "step": 813 }, { "epoch": 0.07922141119221411, "grad_norm": 1.4955958242475926, "learning_rate": 9.936778704111278e-06, "loss": 0.2999, "step": 814 }, { "epoch": 0.07931873479318735, "grad_norm": 1.6038468607718186, "learning_rate": 9.9365286064472e-06, "loss": 0.4897, "step": 815 }, { "epoch": 0.07941605839416059, "grad_norm": 1.8040845780349017, "learning_rate": 9.93627801823724e-06, "loss": 0.6413, "step": 816 }, { "epoch": 0.07951338199513382, "grad_norm": 1.4598215502284355, "learning_rate": 9.936026939506298e-06, "loss": 0.3687, "step": 817 }, { "epoch": 0.07961070559610706, "grad_norm": 1.340412030499075, "learning_rate": 9.935775370279324e-06, "loss": 0.3833, "step": 818 }, { "epoch": 0.0797080291970803, "grad_norm": 1.6913032059853774, "learning_rate": 9.935523310581318e-06, "loss": 0.5857, "step": 819 }, { "epoch": 0.07980535279805352, "grad_norm": 1.9970663728185467, "learning_rate": 9.93527076043733e-06, "loss": 0.6843, "step": 820 }, { "epoch": 0.07990267639902676, "grad_norm": 1.4408921562941295, "learning_rate": 9.93501771987245e-06, "loss": 0.4385, "step": 821 }, { "epoch": 0.08, "grad_norm": 1.5184490203891443, "learning_rate": 9.934764188911827e-06, "loss": 0.4708, "step": 822 }, { "epoch": 0.08009732360097324, "grad_norm": 1.8501562903086661, "learning_rate": 9.934510167580654e-06, "loss": 0.6431, "step": 823 }, { "epoch": 0.08019464720194647, "grad_norm": 1.6997829158405129, "learning_rate": 9.934255655904172e-06, "loss": 0.5188, "step": 824 }, { "epoch": 0.08029197080291971, "grad_norm": 1.8510241792275326, "learning_rate": 9.934000653907674e-06, "loss": 0.5457, "step": 825 }, { "epoch": 0.08038929440389295, "grad_norm": 1.6853569692908912, "learning_rate": 9.933745161616498e-06, "loss": 0.5062, "step": 826 }, { "epoch": 0.08048661800486619, "grad_norm": 1.3066104263898661, "learning_rate": 9.93348917905603e-06, "loss": 0.404, "step": 827 }, { "epoch": 0.08058394160583941, "grad_norm": 1.2788244408859646, "learning_rate": 9.933232706251712e-06, "loss": 0.3253, "step": 828 }, { "epoch": 0.08068126520681265, "grad_norm": 2.2690800072126325, "learning_rate": 9.932975743229027e-06, "loss": 0.3405, "step": 829 }, { "epoch": 0.08077858880778589, "grad_norm": 1.9113871035353245, "learning_rate": 9.932718290013512e-06, "loss": 0.5989, "step": 830 }, { "epoch": 0.08087591240875912, "grad_norm": 1.3655256798283997, "learning_rate": 9.932460346630748e-06, "loss": 0.2942, "step": 831 }, { "epoch": 0.08097323600973236, "grad_norm": 1.5234864838378999, "learning_rate": 9.932201913106366e-06, "loss": 0.3913, "step": 832 }, { "epoch": 0.0810705596107056, "grad_norm": 1.3752195876516826, "learning_rate": 9.93194298946605e-06, "loss": 0.3293, "step": 833 }, { "epoch": 0.08116788321167884, "grad_norm": 1.4842622412969824, "learning_rate": 9.931683575735527e-06, "loss": 0.4157, "step": 834 }, { "epoch": 0.08126520681265206, "grad_norm": 4.003685207313109, "learning_rate": 9.931423671940577e-06, "loss": 0.3276, "step": 835 }, { "epoch": 0.0813625304136253, "grad_norm": 1.509943035011216, "learning_rate": 9.931163278107023e-06, "loss": 0.4045, "step": 836 }, { "epoch": 0.08145985401459854, "grad_norm": 1.4382523765338775, "learning_rate": 9.930902394260746e-06, "loss": 0.2709, "step": 837 }, { "epoch": 0.08155717761557178, "grad_norm": 1.4492711471586157, "learning_rate": 9.930641020427665e-06, "loss": 0.3957, "step": 838 }, { "epoch": 0.08165450121654501, "grad_norm": 1.7428876214187694, "learning_rate": 9.930379156633758e-06, "loss": 0.5257, "step": 839 }, { "epoch": 0.08175182481751825, "grad_norm": 1.5652514836380926, "learning_rate": 9.930116802905042e-06, "loss": 0.4948, "step": 840 }, { "epoch": 0.08184914841849149, "grad_norm": 2.4133112951540494, "learning_rate": 9.929853959267589e-06, "loss": 0.5455, "step": 841 }, { "epoch": 0.08194647201946471, "grad_norm": 1.4309460046419233, "learning_rate": 9.929590625747518e-06, "loss": 0.4057, "step": 842 }, { "epoch": 0.08204379562043795, "grad_norm": 1.0450296792009146, "learning_rate": 9.929326802370995e-06, "loss": 0.2332, "step": 843 }, { "epoch": 0.0821411192214112, "grad_norm": 1.1201933325217828, "learning_rate": 9.92906248916424e-06, "loss": 0.3264, "step": 844 }, { "epoch": 0.08223844282238443, "grad_norm": 1.6243579769967154, "learning_rate": 9.928797686153515e-06, "loss": 0.5385, "step": 845 }, { "epoch": 0.08233576642335766, "grad_norm": 1.3496069901220336, "learning_rate": 9.928532393365136e-06, "loss": 0.3875, "step": 846 }, { "epoch": 0.0824330900243309, "grad_norm": 1.4862888245769246, "learning_rate": 9.928266610825462e-06, "loss": 0.4493, "step": 847 }, { "epoch": 0.08253041362530414, "grad_norm": 1.8305160014899666, "learning_rate": 9.928000338560906e-06, "loss": 0.4582, "step": 848 }, { "epoch": 0.08262773722627738, "grad_norm": 1.642584946989029, "learning_rate": 9.927733576597926e-06, "loss": 0.3347, "step": 849 }, { "epoch": 0.0827250608272506, "grad_norm": 1.5413363162928122, "learning_rate": 9.927466324963033e-06, "loss": 0.4607, "step": 850 }, { "epoch": 0.08282238442822384, "grad_norm": 1.7093263469236866, "learning_rate": 9.927198583682784e-06, "loss": 0.5706, "step": 851 }, { "epoch": 0.08291970802919708, "grad_norm": 1.531714933227777, "learning_rate": 9.926930352783781e-06, "loss": 0.533, "step": 852 }, { "epoch": 0.08301703163017031, "grad_norm": 1.8181822267445191, "learning_rate": 9.926661632292683e-06, "loss": 0.5946, "step": 853 }, { "epoch": 0.08311435523114355, "grad_norm": 1.8304662465930317, "learning_rate": 9.926392422236189e-06, "loss": 0.3746, "step": 854 }, { "epoch": 0.08321167883211679, "grad_norm": 1.3135536142885351, "learning_rate": 9.926122722641051e-06, "loss": 0.429, "step": 855 }, { "epoch": 0.08330900243309003, "grad_norm": 1.714390027755308, "learning_rate": 9.925852533534071e-06, "loss": 0.6806, "step": 856 }, { "epoch": 0.08340632603406326, "grad_norm": 1.3399957064659453, "learning_rate": 9.925581854942099e-06, "loss": 0.2824, "step": 857 }, { "epoch": 0.0835036496350365, "grad_norm": 1.3705351036499993, "learning_rate": 9.925310686892026e-06, "loss": 0.3085, "step": 858 }, { "epoch": 0.08360097323600973, "grad_norm": 1.5064665959171673, "learning_rate": 9.925039029410807e-06, "loss": 0.4445, "step": 859 }, { "epoch": 0.08369829683698297, "grad_norm": 1.725614330530946, "learning_rate": 9.924766882525433e-06, "loss": 0.4704, "step": 860 }, { "epoch": 0.0837956204379562, "grad_norm": 1.765372064078189, "learning_rate": 9.924494246262944e-06, "loss": 0.6383, "step": 861 }, { "epoch": 0.08389294403892944, "grad_norm": 2.085503007877936, "learning_rate": 9.924221120650434e-06, "loss": 0.296, "step": 862 }, { "epoch": 0.08399026763990268, "grad_norm": 1.7898541160892734, "learning_rate": 9.923947505715046e-06, "loss": 0.5991, "step": 863 }, { "epoch": 0.0840875912408759, "grad_norm": 1.6476104975968628, "learning_rate": 9.923673401483968e-06, "loss": 0.4734, "step": 864 }, { "epoch": 0.08418491484184915, "grad_norm": 1.5502768976775265, "learning_rate": 9.923398807984439e-06, "loss": 0.2764, "step": 865 }, { "epoch": 0.08428223844282239, "grad_norm": 1.2398437846135097, "learning_rate": 9.923123725243744e-06, "loss": 0.2705, "step": 866 }, { "epoch": 0.08437956204379563, "grad_norm": 1.5290591078236662, "learning_rate": 9.922848153289217e-06, "loss": 0.4228, "step": 867 }, { "epoch": 0.08447688564476885, "grad_norm": 1.134889947118225, "learning_rate": 9.922572092148244e-06, "loss": 0.2953, "step": 868 }, { "epoch": 0.08457420924574209, "grad_norm": 1.6307620082274505, "learning_rate": 9.922295541848257e-06, "loss": 0.3363, "step": 869 }, { "epoch": 0.08467153284671533, "grad_norm": 1.373015271795792, "learning_rate": 9.922018502416736e-06, "loss": 0.3593, "step": 870 }, { "epoch": 0.08476885644768857, "grad_norm": 1.7500724096304088, "learning_rate": 9.921740973881211e-06, "loss": 0.5236, "step": 871 }, { "epoch": 0.0848661800486618, "grad_norm": 1.6167507595463353, "learning_rate": 9.92146295626926e-06, "loss": 0.5138, "step": 872 }, { "epoch": 0.08496350364963504, "grad_norm": 1.0398007401901226, "learning_rate": 9.92118444960851e-06, "loss": 0.295, "step": 873 }, { "epoch": 0.08506082725060828, "grad_norm": 1.4140920056378707, "learning_rate": 9.920905453926637e-06, "loss": 0.4192, "step": 874 }, { "epoch": 0.0851581508515815, "grad_norm": 1.8785238213855096, "learning_rate": 9.920625969251365e-06, "loss": 0.4228, "step": 875 }, { "epoch": 0.08525547445255474, "grad_norm": 1.719991686268608, "learning_rate": 9.920345995610465e-06, "loss": 0.5026, "step": 876 }, { "epoch": 0.08535279805352798, "grad_norm": 1.7112372148926476, "learning_rate": 9.92006553303176e-06, "loss": 0.3157, "step": 877 }, { "epoch": 0.08545012165450122, "grad_norm": 2.5105720144829116, "learning_rate": 9.919784581543117e-06, "loss": 0.4777, "step": 878 }, { "epoch": 0.08554744525547445, "grad_norm": 1.42848630379055, "learning_rate": 9.919503141172458e-06, "loss": 0.3998, "step": 879 }, { "epoch": 0.08564476885644769, "grad_norm": 1.4246136626839867, "learning_rate": 9.919221211947748e-06, "loss": 0.4415, "step": 880 }, { "epoch": 0.08574209245742093, "grad_norm": 1.939970471855472, "learning_rate": 9.918938793897002e-06, "loss": 0.5887, "step": 881 }, { "epoch": 0.08583941605839417, "grad_norm": 1.5467402852284964, "learning_rate": 9.918655887048285e-06, "loss": 0.3726, "step": 882 }, { "epoch": 0.08593673965936739, "grad_norm": 1.6261636529000345, "learning_rate": 9.918372491429708e-06, "loss": 0.3382, "step": 883 }, { "epoch": 0.08603406326034063, "grad_norm": 1.4859289768748727, "learning_rate": 9.918088607069434e-06, "loss": 0.4837, "step": 884 }, { "epoch": 0.08613138686131387, "grad_norm": 1.8534453271170916, "learning_rate": 9.917804233995673e-06, "loss": 0.5948, "step": 885 }, { "epoch": 0.08622871046228711, "grad_norm": 1.3491809126204122, "learning_rate": 9.917519372236684e-06, "loss": 0.381, "step": 886 }, { "epoch": 0.08632603406326034, "grad_norm": 1.4913268478302555, "learning_rate": 9.91723402182077e-06, "loss": 0.2872, "step": 887 }, { "epoch": 0.08642335766423358, "grad_norm": 1.5345667515291348, "learning_rate": 9.916948182776289e-06, "loss": 0.4426, "step": 888 }, { "epoch": 0.08652068126520682, "grad_norm": 1.9142340135608018, "learning_rate": 9.916661855131646e-06, "loss": 0.467, "step": 889 }, { "epoch": 0.08661800486618004, "grad_norm": 1.7451883652681546, "learning_rate": 9.916375038915291e-06, "loss": 0.3579, "step": 890 }, { "epoch": 0.08671532846715328, "grad_norm": 3.3675828599824618, "learning_rate": 9.916087734155728e-06, "loss": 0.3965, "step": 891 }, { "epoch": 0.08681265206812652, "grad_norm": 1.6430989821947144, "learning_rate": 9.915799940881504e-06, "loss": 0.5089, "step": 892 }, { "epoch": 0.08690997566909976, "grad_norm": 1.8434153107573372, "learning_rate": 9.915511659121219e-06, "loss": 0.6513, "step": 893 }, { "epoch": 0.08700729927007299, "grad_norm": 1.7259560464984558, "learning_rate": 9.91522288890352e-06, "loss": 0.5963, "step": 894 }, { "epoch": 0.08710462287104623, "grad_norm": 1.4417036209809253, "learning_rate": 9.9149336302571e-06, "loss": 0.4076, "step": 895 }, { "epoch": 0.08720194647201947, "grad_norm": 1.4565626930182671, "learning_rate": 9.914643883210704e-06, "loss": 0.3548, "step": 896 }, { "epoch": 0.08729927007299271, "grad_norm": 1.8286482885292266, "learning_rate": 9.914353647793126e-06, "loss": 0.5158, "step": 897 }, { "epoch": 0.08739659367396593, "grad_norm": 1.573235746781315, "learning_rate": 9.914062924033204e-06, "loss": 0.4804, "step": 898 }, { "epoch": 0.08749391727493917, "grad_norm": 1.7725042500734154, "learning_rate": 9.91377171195983e-06, "loss": 0.4037, "step": 899 }, { "epoch": 0.08759124087591241, "grad_norm": 1.5572801757524644, "learning_rate": 9.913480011601939e-06, "loss": 0.2757, "step": 900 }, { "epoch": 0.08768856447688564, "grad_norm": 1.690990088453521, "learning_rate": 9.91318782298852e-06, "loss": 0.624, "step": 901 }, { "epoch": 0.08778588807785888, "grad_norm": 1.5797017595834213, "learning_rate": 9.912895146148609e-06, "loss": 0.418, "step": 902 }, { "epoch": 0.08788321167883212, "grad_norm": 1.722754374021215, "learning_rate": 9.912601981111287e-06, "loss": 0.5991, "step": 903 }, { "epoch": 0.08798053527980536, "grad_norm": 1.2395740583484196, "learning_rate": 9.912308327905683e-06, "loss": 0.3632, "step": 904 }, { "epoch": 0.08807785888077858, "grad_norm": 1.8637568028899596, "learning_rate": 9.912014186560985e-06, "loss": 0.5766, "step": 905 }, { "epoch": 0.08817518248175182, "grad_norm": 1.8489319991981024, "learning_rate": 9.911719557106418e-06, "loss": 0.6834, "step": 906 }, { "epoch": 0.08827250608272506, "grad_norm": 1.6692858460733677, "learning_rate": 9.911424439571258e-06, "loss": 0.5067, "step": 907 }, { "epoch": 0.0883698296836983, "grad_norm": 1.4727605888984552, "learning_rate": 9.911128833984834e-06, "loss": 0.3141, "step": 908 }, { "epoch": 0.08846715328467153, "grad_norm": 1.644393806422472, "learning_rate": 9.910832740376518e-06, "loss": 0.4599, "step": 909 }, { "epoch": 0.08856447688564477, "grad_norm": 1.730275300452632, "learning_rate": 9.910536158775734e-06, "loss": 0.3908, "step": 910 }, { "epoch": 0.08866180048661801, "grad_norm": 1.7281903494262714, "learning_rate": 9.910239089211955e-06, "loss": 0.5919, "step": 911 }, { "epoch": 0.08875912408759123, "grad_norm": 1.7234172913238917, "learning_rate": 9.909941531714699e-06, "loss": 0.609, "step": 912 }, { "epoch": 0.08885644768856447, "grad_norm": 1.4594702058569258, "learning_rate": 9.909643486313533e-06, "loss": 0.4399, "step": 913 }, { "epoch": 0.08895377128953771, "grad_norm": 1.4625782448468165, "learning_rate": 9.90934495303808e-06, "loss": 0.4011, "step": 914 }, { "epoch": 0.08905109489051095, "grad_norm": 1.7262645481609784, "learning_rate": 9.909045931918e-06, "loss": 0.4992, "step": 915 }, { "epoch": 0.08914841849148418, "grad_norm": 1.6255222361700263, "learning_rate": 9.908746422983007e-06, "loss": 0.4909, "step": 916 }, { "epoch": 0.08924574209245742, "grad_norm": 1.7512982185254946, "learning_rate": 9.908446426262865e-06, "loss": 0.5527, "step": 917 }, { "epoch": 0.08934306569343066, "grad_norm": 1.617605772613541, "learning_rate": 9.908145941787386e-06, "loss": 0.3228, "step": 918 }, { "epoch": 0.0894403892944039, "grad_norm": 1.489706963519404, "learning_rate": 9.907844969586427e-06, "loss": 0.4838, "step": 919 }, { "epoch": 0.08953771289537713, "grad_norm": 1.193837371345013, "learning_rate": 9.907543509689896e-06, "loss": 0.284, "step": 920 }, { "epoch": 0.08963503649635036, "grad_norm": 1.5855787651349198, "learning_rate": 9.907241562127752e-06, "loss": 0.4641, "step": 921 }, { "epoch": 0.0897323600973236, "grad_norm": 1.2401284480478103, "learning_rate": 9.906939126929998e-06, "loss": 0.246, "step": 922 }, { "epoch": 0.08982968369829683, "grad_norm": 1.503842201355298, "learning_rate": 9.906636204126685e-06, "loss": 0.4031, "step": 923 }, { "epoch": 0.08992700729927007, "grad_norm": 1.9138265658958267, "learning_rate": 9.906332793747917e-06, "loss": 0.587, "step": 924 }, { "epoch": 0.09002433090024331, "grad_norm": 1.5381184892388742, "learning_rate": 9.906028895823844e-06, "loss": 0.4119, "step": 925 }, { "epoch": 0.09012165450121655, "grad_norm": 1.5769181877690257, "learning_rate": 9.905724510384664e-06, "loss": 0.4071, "step": 926 }, { "epoch": 0.09021897810218978, "grad_norm": 1.4644408625641083, "learning_rate": 9.905419637460625e-06, "loss": 0.3656, "step": 927 }, { "epoch": 0.09031630170316302, "grad_norm": 2.043739071504731, "learning_rate": 9.90511427708202e-06, "loss": 0.6317, "step": 928 }, { "epoch": 0.09041362530413626, "grad_norm": 1.8397228419915481, "learning_rate": 9.904808429279195e-06, "loss": 0.6656, "step": 929 }, { "epoch": 0.0905109489051095, "grad_norm": 1.6689588837493128, "learning_rate": 9.904502094082542e-06, "loss": 0.4603, "step": 930 }, { "epoch": 0.09060827250608272, "grad_norm": 1.7157610479724803, "learning_rate": 9.9041952715225e-06, "loss": 0.3566, "step": 931 }, { "epoch": 0.09070559610705596, "grad_norm": 1.5797548847560638, "learning_rate": 9.90388796162956e-06, "loss": 0.527, "step": 932 }, { "epoch": 0.0908029197080292, "grad_norm": 1.3861944362556795, "learning_rate": 9.903580164434262e-06, "loss": 0.3555, "step": 933 }, { "epoch": 0.09090024330900243, "grad_norm": 1.4873043668950738, "learning_rate": 9.903271879967185e-06, "loss": 0.3606, "step": 934 }, { "epoch": 0.09099756690997567, "grad_norm": 1.5471770637050817, "learning_rate": 9.90296310825897e-06, "loss": 0.5407, "step": 935 }, { "epoch": 0.0910948905109489, "grad_norm": 1.7410898214633266, "learning_rate": 9.902653849340296e-06, "loss": 0.5604, "step": 936 }, { "epoch": 0.09119221411192215, "grad_norm": 1.490257412993615, "learning_rate": 9.902344103241897e-06, "loss": 0.4293, "step": 937 }, { "epoch": 0.09128953771289537, "grad_norm": 1.3076716120407041, "learning_rate": 9.90203386999455e-06, "loss": 0.4311, "step": 938 }, { "epoch": 0.09138686131386861, "grad_norm": 1.63883307554104, "learning_rate": 9.901723149629085e-06, "loss": 0.5026, "step": 939 }, { "epoch": 0.09148418491484185, "grad_norm": 1.460694807977355, "learning_rate": 9.901411942176377e-06, "loss": 0.4449, "step": 940 }, { "epoch": 0.09158150851581509, "grad_norm": 1.631318499416747, "learning_rate": 9.901100247667352e-06, "loss": 0.4762, "step": 941 }, { "epoch": 0.09167883211678832, "grad_norm": 1.472942456024595, "learning_rate": 9.900788066132982e-06, "loss": 0.4208, "step": 942 }, { "epoch": 0.09177615571776156, "grad_norm": 1.9471723252943203, "learning_rate": 9.900475397604292e-06, "loss": 0.4887, "step": 943 }, { "epoch": 0.0918734793187348, "grad_norm": 1.4192635165617975, "learning_rate": 9.900162242112348e-06, "loss": 0.4753, "step": 944 }, { "epoch": 0.09197080291970802, "grad_norm": 1.7864248496903834, "learning_rate": 9.89984859968827e-06, "loss": 0.6063, "step": 945 }, { "epoch": 0.09206812652068126, "grad_norm": 1.402919088092856, "learning_rate": 9.899534470363225e-06, "loss": 0.3561, "step": 946 }, { "epoch": 0.0921654501216545, "grad_norm": 1.15011785152118, "learning_rate": 9.89921985416843e-06, "loss": 0.2605, "step": 947 }, { "epoch": 0.09226277372262774, "grad_norm": 1.2940536511249239, "learning_rate": 9.898904751135145e-06, "loss": 0.2503, "step": 948 }, { "epoch": 0.09236009732360097, "grad_norm": 1.5093308152075566, "learning_rate": 9.898589161294684e-06, "loss": 0.4185, "step": 949 }, { "epoch": 0.09245742092457421, "grad_norm": 1.5826010349075055, "learning_rate": 9.898273084678406e-06, "loss": 0.536, "step": 950 }, { "epoch": 0.09255474452554745, "grad_norm": 1.5672518381317015, "learning_rate": 9.897956521317724e-06, "loss": 0.5068, "step": 951 }, { "epoch": 0.09265206812652069, "grad_norm": 1.784767292144658, "learning_rate": 9.89763947124409e-06, "loss": 0.6601, "step": 952 }, { "epoch": 0.09274939172749391, "grad_norm": 1.620681747107968, "learning_rate": 9.897321934489011e-06, "loss": 0.5402, "step": 953 }, { "epoch": 0.09284671532846715, "grad_norm": 1.7479722673062432, "learning_rate": 9.897003911084042e-06, "loss": 0.6593, "step": 954 }, { "epoch": 0.09294403892944039, "grad_norm": 1.6618363798373263, "learning_rate": 9.896685401060783e-06, "loss": 0.6086, "step": 955 }, { "epoch": 0.09304136253041363, "grad_norm": 1.3782603882872615, "learning_rate": 9.896366404450888e-06, "loss": 0.3431, "step": 956 }, { "epoch": 0.09313868613138686, "grad_norm": 1.6607836446620106, "learning_rate": 9.896046921286053e-06, "loss": 0.4015, "step": 957 }, { "epoch": 0.0932360097323601, "grad_norm": 1.372535143543006, "learning_rate": 9.895726951598026e-06, "loss": 0.3627, "step": 958 }, { "epoch": 0.09333333333333334, "grad_norm": 1.965175835699204, "learning_rate": 9.895406495418602e-06, "loss": 0.434, "step": 959 }, { "epoch": 0.09343065693430656, "grad_norm": 1.6072227382486934, "learning_rate": 9.895085552779626e-06, "loss": 0.3666, "step": 960 }, { "epoch": 0.0935279805352798, "grad_norm": 1.8680414138630521, "learning_rate": 9.894764123712991e-06, "loss": 0.6182, "step": 961 }, { "epoch": 0.09362530413625304, "grad_norm": 1.7249394724081422, "learning_rate": 9.894442208250636e-06, "loss": 0.569, "step": 962 }, { "epoch": 0.09372262773722628, "grad_norm": 1.7887658285510963, "learning_rate": 9.894119806424549e-06, "loss": 0.4825, "step": 963 }, { "epoch": 0.09381995133819951, "grad_norm": 1.4470695743772581, "learning_rate": 9.89379691826677e-06, "loss": 0.4036, "step": 964 }, { "epoch": 0.09391727493917275, "grad_norm": 1.739037372856574, "learning_rate": 9.893473543809383e-06, "loss": 0.3734, "step": 965 }, { "epoch": 0.09401459854014599, "grad_norm": 1.2401623802615098, "learning_rate": 9.893149683084522e-06, "loss": 0.2892, "step": 966 }, { "epoch": 0.09411192214111923, "grad_norm": 1.632367817316159, "learning_rate": 9.892825336124369e-06, "loss": 0.3324, "step": 967 }, { "epoch": 0.09420924574209245, "grad_norm": 1.4553279790204596, "learning_rate": 9.892500502961156e-06, "loss": 0.4518, "step": 968 }, { "epoch": 0.0943065693430657, "grad_norm": 2.0184949211791867, "learning_rate": 9.892175183627161e-06, "loss": 0.496, "step": 969 }, { "epoch": 0.09440389294403893, "grad_norm": 1.3847811204395728, "learning_rate": 9.89184937815471e-06, "loss": 0.3908, "step": 970 }, { "epoch": 0.09450121654501216, "grad_norm": 1.7325451795183482, "learning_rate": 9.89152308657618e-06, "loss": 0.5813, "step": 971 }, { "epoch": 0.0945985401459854, "grad_norm": 1.3485480854398895, "learning_rate": 9.891196308923994e-06, "loss": 0.2773, "step": 972 }, { "epoch": 0.09469586374695864, "grad_norm": 1.6137214411092917, "learning_rate": 9.890869045230625e-06, "loss": 0.573, "step": 973 }, { "epoch": 0.09479318734793188, "grad_norm": 1.8098732560393935, "learning_rate": 9.890541295528593e-06, "loss": 0.5765, "step": 974 }, { "epoch": 0.0948905109489051, "grad_norm": 1.7169741386061155, "learning_rate": 9.890213059850467e-06, "loss": 0.5463, "step": 975 }, { "epoch": 0.09498783454987834, "grad_norm": 1.6226425677233698, "learning_rate": 9.889884338228861e-06, "loss": 0.459, "step": 976 }, { "epoch": 0.09508515815085158, "grad_norm": 1.5712338302132318, "learning_rate": 9.889555130696445e-06, "loss": 0.2926, "step": 977 }, { "epoch": 0.09518248175182482, "grad_norm": 2.368668096329164, "learning_rate": 9.88922543728593e-06, "loss": 0.4602, "step": 978 }, { "epoch": 0.09527980535279805, "grad_norm": 1.5481463515619227, "learning_rate": 9.888895258030077e-06, "loss": 0.382, "step": 979 }, { "epoch": 0.09537712895377129, "grad_norm": 1.5566394762827083, "learning_rate": 9.888564592961698e-06, "loss": 0.4432, "step": 980 }, { "epoch": 0.09547445255474453, "grad_norm": 1.2929219586068095, "learning_rate": 9.888233442113651e-06, "loss": 0.2986, "step": 981 }, { "epoch": 0.09557177615571776, "grad_norm": 1.7926346211976876, "learning_rate": 9.887901805518841e-06, "loss": 0.4536, "step": 982 }, { "epoch": 0.095669099756691, "grad_norm": 1.5810862037952855, "learning_rate": 9.887569683210225e-06, "loss": 0.5143, "step": 983 }, { "epoch": 0.09576642335766423, "grad_norm": 1.486412737689962, "learning_rate": 9.887237075220805e-06, "loss": 0.4422, "step": 984 }, { "epoch": 0.09586374695863747, "grad_norm": 1.5634292890846626, "learning_rate": 9.886903981583633e-06, "loss": 0.5158, "step": 985 }, { "epoch": 0.0959610705596107, "grad_norm": 1.4911106877832496, "learning_rate": 9.88657040233181e-06, "loss": 0.3584, "step": 986 }, { "epoch": 0.09605839416058394, "grad_norm": 1.8920202230134835, "learning_rate": 9.886236337498481e-06, "loss": 0.7059, "step": 987 }, { "epoch": 0.09615571776155718, "grad_norm": 1.9765830057761664, "learning_rate": 9.885901787116844e-06, "loss": 0.3363, "step": 988 }, { "epoch": 0.09625304136253042, "grad_norm": 1.7412713212065478, "learning_rate": 9.885566751220144e-06, "loss": 0.6238, "step": 989 }, { "epoch": 0.09635036496350365, "grad_norm": 1.4558500764026314, "learning_rate": 9.885231229841675e-06, "loss": 0.5033, "step": 990 }, { "epoch": 0.09644768856447689, "grad_norm": 1.5722863237428275, "learning_rate": 9.884895223014772e-06, "loss": 0.3026, "step": 991 }, { "epoch": 0.09654501216545013, "grad_norm": 1.7850396516814273, "learning_rate": 9.88455873077283e-06, "loss": 0.6797, "step": 992 }, { "epoch": 0.09664233576642335, "grad_norm": 1.5907642595826164, "learning_rate": 9.884221753149286e-06, "loss": 0.5051, "step": 993 }, { "epoch": 0.09673965936739659, "grad_norm": 1.383326117178851, "learning_rate": 9.883884290177623e-06, "loss": 0.394, "step": 994 }, { "epoch": 0.09683698296836983, "grad_norm": 1.5330791836349085, "learning_rate": 9.883546341891375e-06, "loss": 0.4531, "step": 995 }, { "epoch": 0.09693430656934307, "grad_norm": 1.3858453283442664, "learning_rate": 9.883207908324126e-06, "loss": 0.4674, "step": 996 }, { "epoch": 0.0970316301703163, "grad_norm": 1.2633519423598012, "learning_rate": 9.882868989509507e-06, "loss": 0.3053, "step": 997 }, { "epoch": 0.09712895377128954, "grad_norm": 1.5725755469000553, "learning_rate": 9.882529585481194e-06, "loss": 0.5382, "step": 998 }, { "epoch": 0.09722627737226278, "grad_norm": 1.594807816051373, "learning_rate": 9.882189696272916e-06, "loss": 0.5027, "step": 999 }, { "epoch": 0.09732360097323602, "grad_norm": 1.7855937930735857, "learning_rate": 9.881849321918446e-06, "loss": 0.6336, "step": 1000 }, { "epoch": 0.09742092457420924, "grad_norm": 1.8161736452208326, "learning_rate": 9.88150846245161e-06, "loss": 0.5432, "step": 1001 }, { "epoch": 0.09751824817518248, "grad_norm": 1.2323791206307224, "learning_rate": 9.881167117906276e-06, "loss": 0.3361, "step": 1002 }, { "epoch": 0.09761557177615572, "grad_norm": 1.6720448345305876, "learning_rate": 9.880825288316367e-06, "loss": 0.3583, "step": 1003 }, { "epoch": 0.09771289537712895, "grad_norm": 1.408364549926656, "learning_rate": 9.880482973715846e-06, "loss": 0.3847, "step": 1004 }, { "epoch": 0.09781021897810219, "grad_norm": 1.493256031544701, "learning_rate": 9.880140174138735e-06, "loss": 0.3611, "step": 1005 }, { "epoch": 0.09790754257907543, "grad_norm": 1.3658283125944337, "learning_rate": 9.879796889619093e-06, "loss": 0.3555, "step": 1006 }, { "epoch": 0.09800486618004867, "grad_norm": 1.7346143127846696, "learning_rate": 9.879453120191037e-06, "loss": 0.5028, "step": 1007 }, { "epoch": 0.09810218978102189, "grad_norm": 1.9094090784905724, "learning_rate": 9.879108865888724e-06, "loss": 0.4799, "step": 1008 }, { "epoch": 0.09819951338199513, "grad_norm": 1.1235415223499565, "learning_rate": 9.878764126746364e-06, "loss": 0.2181, "step": 1009 }, { "epoch": 0.09829683698296837, "grad_norm": 1.494557121918356, "learning_rate": 9.878418902798215e-06, "loss": 0.4548, "step": 1010 }, { "epoch": 0.09839416058394161, "grad_norm": 1.5340021274706077, "learning_rate": 9.87807319407858e-06, "loss": 0.4952, "step": 1011 }, { "epoch": 0.09849148418491484, "grad_norm": 1.2523545024978981, "learning_rate": 9.877727000621815e-06, "loss": 0.2887, "step": 1012 }, { "epoch": 0.09858880778588808, "grad_norm": 1.424446798325285, "learning_rate": 9.877380322462317e-06, "loss": 0.3628, "step": 1013 }, { "epoch": 0.09868613138686132, "grad_norm": 1.6382574528105933, "learning_rate": 9.877033159634542e-06, "loss": 0.5396, "step": 1014 }, { "epoch": 0.09878345498783454, "grad_norm": 1.544256440771578, "learning_rate": 9.876685512172982e-06, "loss": 0.4031, "step": 1015 }, { "epoch": 0.09888077858880778, "grad_norm": 1.620162733287423, "learning_rate": 9.876337380112185e-06, "loss": 0.4925, "step": 1016 }, { "epoch": 0.09897810218978102, "grad_norm": 1.6140460771461889, "learning_rate": 9.875988763486746e-06, "loss": 0.5549, "step": 1017 }, { "epoch": 0.09907542579075426, "grad_norm": 1.6187864498320685, "learning_rate": 9.875639662331307e-06, "loss": 0.5034, "step": 1018 }, { "epoch": 0.09917274939172749, "grad_norm": 1.249422512171971, "learning_rate": 9.875290076680557e-06, "loss": 0.236, "step": 1019 }, { "epoch": 0.09927007299270073, "grad_norm": 1.5835572971087337, "learning_rate": 9.874940006569236e-06, "loss": 0.5309, "step": 1020 }, { "epoch": 0.09936739659367397, "grad_norm": 0.8658795502351594, "learning_rate": 9.874589452032131e-06, "loss": 0.1911, "step": 1021 }, { "epoch": 0.09946472019464721, "grad_norm": 1.3171385587421753, "learning_rate": 9.874238413104076e-06, "loss": 0.3486, "step": 1022 }, { "epoch": 0.09956204379562043, "grad_norm": 1.4498439375980756, "learning_rate": 9.873886889819953e-06, "loss": 0.1986, "step": 1023 }, { "epoch": 0.09965936739659367, "grad_norm": 1.5991307847988792, "learning_rate": 9.873534882214692e-06, "loss": 0.6397, "step": 1024 }, { "epoch": 0.09975669099756691, "grad_norm": 1.6135151765084201, "learning_rate": 9.873182390323277e-06, "loss": 0.4338, "step": 1025 }, { "epoch": 0.09985401459854015, "grad_norm": 1.465261170994732, "learning_rate": 9.872829414180733e-06, "loss": 0.4692, "step": 1026 }, { "epoch": 0.09995133819951338, "grad_norm": 1.6964068418559575, "learning_rate": 9.872475953822134e-06, "loss": 0.4763, "step": 1027 }, { "epoch": 0.10004866180048662, "grad_norm": 1.5209137969308788, "learning_rate": 9.872122009282604e-06, "loss": 0.4266, "step": 1028 }, { "epoch": 0.10014598540145986, "grad_norm": 1.4495568716439686, "learning_rate": 9.871767580597316e-06, "loss": 0.4087, "step": 1029 }, { "epoch": 0.10024330900243308, "grad_norm": 1.344434785457905, "learning_rate": 9.871412667801488e-06, "loss": 0.3797, "step": 1030 }, { "epoch": 0.10034063260340632, "grad_norm": 1.5794908259633444, "learning_rate": 9.871057270930392e-06, "loss": 0.3939, "step": 1031 }, { "epoch": 0.10043795620437956, "grad_norm": 1.5876979734473795, "learning_rate": 9.870701390019337e-06, "loss": 0.484, "step": 1032 }, { "epoch": 0.1005352798053528, "grad_norm": 1.8773231101994967, "learning_rate": 9.870345025103694e-06, "loss": 0.5893, "step": 1033 }, { "epoch": 0.10063260340632603, "grad_norm": 1.4927383125242464, "learning_rate": 9.869988176218871e-06, "loss": 0.4138, "step": 1034 }, { "epoch": 0.10072992700729927, "grad_norm": 1.4766306382054422, "learning_rate": 9.869630843400331e-06, "loss": 0.4125, "step": 1035 }, { "epoch": 0.10082725060827251, "grad_norm": 2.1872385141217388, "learning_rate": 9.86927302668358e-06, "loss": 0.4581, "step": 1036 }, { "epoch": 0.10092457420924575, "grad_norm": 1.4275090865666056, "learning_rate": 9.868914726104174e-06, "loss": 0.2393, "step": 1037 }, { "epoch": 0.10102189781021897, "grad_norm": 1.6989614006808447, "learning_rate": 9.868555941697721e-06, "loss": 0.4941, "step": 1038 }, { "epoch": 0.10111922141119221, "grad_norm": 1.4357333730365565, "learning_rate": 9.86819667349987e-06, "loss": 0.4907, "step": 1039 }, { "epoch": 0.10121654501216545, "grad_norm": 2.0026376735495055, "learning_rate": 9.867836921546326e-06, "loss": 0.8695, "step": 1040 }, { "epoch": 0.10131386861313868, "grad_norm": 1.6951372609783342, "learning_rate": 9.867476685872833e-06, "loss": 0.6236, "step": 1041 }, { "epoch": 0.10141119221411192, "grad_norm": 1.6963236381946833, "learning_rate": 9.86711596651519e-06, "loss": 0.6358, "step": 1042 }, { "epoch": 0.10150851581508516, "grad_norm": 1.5189733584329748, "learning_rate": 9.866754763509242e-06, "loss": 0.4374, "step": 1043 }, { "epoch": 0.1016058394160584, "grad_norm": 1.2748045341406278, "learning_rate": 9.866393076890881e-06, "loss": 0.4213, "step": 1044 }, { "epoch": 0.10170316301703163, "grad_norm": 1.7405552081322075, "learning_rate": 9.866030906696051e-06, "loss": 0.6708, "step": 1045 }, { "epoch": 0.10180048661800487, "grad_norm": 1.3495682131815454, "learning_rate": 9.865668252960737e-06, "loss": 0.3531, "step": 1046 }, { "epoch": 0.1018978102189781, "grad_norm": 1.5653185028552046, "learning_rate": 9.86530511572098e-06, "loss": 0.4331, "step": 1047 }, { "epoch": 0.10199513381995134, "grad_norm": 1.3992858529840162, "learning_rate": 9.864941495012861e-06, "loss": 0.3388, "step": 1048 }, { "epoch": 0.10209245742092457, "grad_norm": 1.6270586325333123, "learning_rate": 9.864577390872516e-06, "loss": 0.4234, "step": 1049 }, { "epoch": 0.10218978102189781, "grad_norm": 1.8656971621974168, "learning_rate": 9.864212803336126e-06, "loss": 0.718, "step": 1050 }, { "epoch": 0.10228710462287105, "grad_norm": 1.4029758909387644, "learning_rate": 9.86384773243992e-06, "loss": 0.3892, "step": 1051 }, { "epoch": 0.10238442822384428, "grad_norm": 1.1023559958942302, "learning_rate": 9.863482178220176e-06, "loss": 0.2453, "step": 1052 }, { "epoch": 0.10248175182481752, "grad_norm": 1.5775869982106272, "learning_rate": 9.863116140713219e-06, "loss": 0.5324, "step": 1053 }, { "epoch": 0.10257907542579076, "grad_norm": 1.603675899324949, "learning_rate": 9.86274961995542e-06, "loss": 0.4521, "step": 1054 }, { "epoch": 0.102676399026764, "grad_norm": 1.6020699046167006, "learning_rate": 9.862382615983203e-06, "loss": 0.4545, "step": 1055 }, { "epoch": 0.10277372262773722, "grad_norm": 1.474718021659803, "learning_rate": 9.862015128833036e-06, "loss": 0.4822, "step": 1056 }, { "epoch": 0.10287104622871046, "grad_norm": 1.6033514684549, "learning_rate": 9.861647158541438e-06, "loss": 0.5069, "step": 1057 }, { "epoch": 0.1029683698296837, "grad_norm": 1.4841655382640788, "learning_rate": 9.861278705144974e-06, "loss": 0.3865, "step": 1058 }, { "epoch": 0.10306569343065694, "grad_norm": 1.1425556408878823, "learning_rate": 9.860909768680259e-06, "loss": 0.2443, "step": 1059 }, { "epoch": 0.10316301703163017, "grad_norm": 1.5288676978753954, "learning_rate": 9.86054034918395e-06, "loss": 0.3652, "step": 1060 }, { "epoch": 0.1032603406326034, "grad_norm": 1.5264484093473076, "learning_rate": 9.860170446692758e-06, "loss": 0.3318, "step": 1061 }, { "epoch": 0.10335766423357665, "grad_norm": 1.4476258605632986, "learning_rate": 9.859800061243443e-06, "loss": 0.4518, "step": 1062 }, { "epoch": 0.10345498783454987, "grad_norm": 1.336933590040686, "learning_rate": 9.859429192872809e-06, "loss": 0.2652, "step": 1063 }, { "epoch": 0.10355231143552311, "grad_norm": 1.6050187197155075, "learning_rate": 9.859057841617709e-06, "loss": 0.5383, "step": 1064 }, { "epoch": 0.10364963503649635, "grad_norm": 1.3472405276196469, "learning_rate": 9.858686007515045e-06, "loss": 0.4483, "step": 1065 }, { "epoch": 0.10374695863746959, "grad_norm": 1.4838970820374793, "learning_rate": 9.858313690601767e-06, "loss": 0.3506, "step": 1066 }, { "epoch": 0.10384428223844282, "grad_norm": 1.5911831099601979, "learning_rate": 9.857940890914868e-06, "loss": 0.3995, "step": 1067 }, { "epoch": 0.10394160583941606, "grad_norm": 1.415577451063168, "learning_rate": 9.8575676084914e-06, "loss": 0.4773, "step": 1068 }, { "epoch": 0.1040389294403893, "grad_norm": 1.7250253730787564, "learning_rate": 9.857193843368451e-06, "loss": 0.4456, "step": 1069 }, { "epoch": 0.10413625304136254, "grad_norm": 1.5066269873708278, "learning_rate": 9.856819595583166e-06, "loss": 0.5481, "step": 1070 }, { "epoch": 0.10423357664233576, "grad_norm": 1.5626665408071483, "learning_rate": 9.856444865172732e-06, "loss": 0.5382, "step": 1071 }, { "epoch": 0.104330900243309, "grad_norm": 1.9089561390061884, "learning_rate": 9.856069652174385e-06, "loss": 0.5533, "step": 1072 }, { "epoch": 0.10442822384428224, "grad_norm": 1.2757688373398666, "learning_rate": 9.855693956625414e-06, "loss": 0.3065, "step": 1073 }, { "epoch": 0.10452554744525547, "grad_norm": 1.7230598513214688, "learning_rate": 9.85531777856315e-06, "loss": 0.5367, "step": 1074 }, { "epoch": 0.10462287104622871, "grad_norm": 1.8368494244508635, "learning_rate": 9.854941118024973e-06, "loss": 0.4587, "step": 1075 }, { "epoch": 0.10472019464720195, "grad_norm": 1.418583003899538, "learning_rate": 9.854563975048314e-06, "loss": 0.405, "step": 1076 }, { "epoch": 0.10481751824817519, "grad_norm": 1.555078045275604, "learning_rate": 9.854186349670648e-06, "loss": 0.5572, "step": 1077 }, { "epoch": 0.10491484184914841, "grad_norm": 1.5414220083120458, "learning_rate": 9.853808241929502e-06, "loss": 0.3382, "step": 1078 }, { "epoch": 0.10501216545012165, "grad_norm": 1.2895897451723073, "learning_rate": 9.853429651862445e-06, "loss": 0.4342, "step": 1079 }, { "epoch": 0.10510948905109489, "grad_norm": 1.3117010773132232, "learning_rate": 9.853050579507104e-06, "loss": 0.3751, "step": 1080 }, { "epoch": 0.10520681265206813, "grad_norm": 1.5440994948167002, "learning_rate": 9.852671024901141e-06, "loss": 0.4971, "step": 1081 }, { "epoch": 0.10530413625304136, "grad_norm": 1.2028388141262132, "learning_rate": 9.852290988082278e-06, "loss": 0.3933, "step": 1082 }, { "epoch": 0.1054014598540146, "grad_norm": 1.6199890049219825, "learning_rate": 9.851910469088275e-06, "loss": 0.5394, "step": 1083 }, { "epoch": 0.10549878345498784, "grad_norm": 1.4805170620003079, "learning_rate": 9.851529467956946e-06, "loss": 0.2421, "step": 1084 }, { "epoch": 0.10559610705596106, "grad_norm": 1.432802486072686, "learning_rate": 9.851147984726154e-06, "loss": 0.479, "step": 1085 }, { "epoch": 0.1056934306569343, "grad_norm": 1.7662999036343905, "learning_rate": 9.850766019433803e-06, "loss": 0.706, "step": 1086 }, { "epoch": 0.10579075425790754, "grad_norm": 1.9136497208168854, "learning_rate": 9.850383572117853e-06, "loss": 0.7672, "step": 1087 }, { "epoch": 0.10588807785888078, "grad_norm": 1.1667281997438979, "learning_rate": 9.850000642816306e-06, "loss": 0.2263, "step": 1088 }, { "epoch": 0.10598540145985401, "grad_norm": 1.3133144576431575, "learning_rate": 9.849617231567213e-06, "loss": 0.2211, "step": 1089 }, { "epoch": 0.10608272506082725, "grad_norm": 1.411642205718121, "learning_rate": 9.849233338408674e-06, "loss": 0.4379, "step": 1090 }, { "epoch": 0.10618004866180049, "grad_norm": 1.7114110143353651, "learning_rate": 9.84884896337884e-06, "loss": 0.462, "step": 1091 }, { "epoch": 0.10627737226277373, "grad_norm": 1.4035875335457177, "learning_rate": 9.848464106515903e-06, "loss": 0.317, "step": 1092 }, { "epoch": 0.10637469586374695, "grad_norm": 1.5988244446936477, "learning_rate": 9.848078767858107e-06, "loss": 0.5254, "step": 1093 }, { "epoch": 0.1064720194647202, "grad_norm": 1.6336010940510732, "learning_rate": 9.847692947443745e-06, "loss": 0.4979, "step": 1094 }, { "epoch": 0.10656934306569343, "grad_norm": 1.68747146017171, "learning_rate": 9.847306645311154e-06, "loss": 0.5515, "step": 1095 }, { "epoch": 0.10666666666666667, "grad_norm": 1.497709273552353, "learning_rate": 9.846919861498724e-06, "loss": 0.4221, "step": 1096 }, { "epoch": 0.1067639902676399, "grad_norm": 1.4761873606313476, "learning_rate": 9.846532596044887e-06, "loss": 0.4296, "step": 1097 }, { "epoch": 0.10686131386861314, "grad_norm": 1.1441862868877024, "learning_rate": 9.846144848988127e-06, "loss": 0.2816, "step": 1098 }, { "epoch": 0.10695863746958638, "grad_norm": 1.7272604657837642, "learning_rate": 9.845756620366976e-06, "loss": 0.5916, "step": 1099 }, { "epoch": 0.1070559610705596, "grad_norm": 1.3799505412872324, "learning_rate": 9.84536791022001e-06, "loss": 0.3947, "step": 1100 }, { "epoch": 0.10715328467153284, "grad_norm": 1.6943818099878132, "learning_rate": 9.844978718585855e-06, "loss": 0.4737, "step": 1101 }, { "epoch": 0.10725060827250608, "grad_norm": 1.5405614688920448, "learning_rate": 9.84458904550319e-06, "loss": 0.4152, "step": 1102 }, { "epoch": 0.10734793187347932, "grad_norm": 1.6335292867295117, "learning_rate": 9.844198891010733e-06, "loss": 0.5677, "step": 1103 }, { "epoch": 0.10744525547445255, "grad_norm": 1.302603147379972, "learning_rate": 9.843808255147253e-06, "loss": 0.4283, "step": 1104 }, { "epoch": 0.10754257907542579, "grad_norm": 1.7967506033919078, "learning_rate": 9.84341713795157e-06, "loss": 0.6995, "step": 1105 }, { "epoch": 0.10763990267639903, "grad_norm": 1.7320527346822367, "learning_rate": 9.84302553946255e-06, "loss": 0.5369, "step": 1106 }, { "epoch": 0.10773722627737227, "grad_norm": 1.2124746103676287, "learning_rate": 9.842633459719104e-06, "loss": 0.296, "step": 1107 }, { "epoch": 0.1078345498783455, "grad_norm": 1.6638227119864655, "learning_rate": 9.842240898760195e-06, "loss": 0.5632, "step": 1108 }, { "epoch": 0.10793187347931874, "grad_norm": 1.5728826792836543, "learning_rate": 9.841847856624833e-06, "loss": 0.3407, "step": 1109 }, { "epoch": 0.10802919708029197, "grad_norm": 1.4855225795030034, "learning_rate": 9.841454333352073e-06, "loss": 0.534, "step": 1110 }, { "epoch": 0.1081265206812652, "grad_norm": 1.741747608159628, "learning_rate": 9.841060328981019e-06, "loss": 0.5739, "step": 1111 }, { "epoch": 0.10822384428223844, "grad_norm": 1.2765533148109443, "learning_rate": 9.840665843550825e-06, "loss": 0.335, "step": 1112 }, { "epoch": 0.10832116788321168, "grad_norm": 1.9391527817309226, "learning_rate": 9.840270877100692e-06, "loss": 0.5604, "step": 1113 }, { "epoch": 0.10841849148418492, "grad_norm": 1.2570937099076989, "learning_rate": 9.839875429669865e-06, "loss": 0.3098, "step": 1114 }, { "epoch": 0.10851581508515815, "grad_norm": 1.6345857910998665, "learning_rate": 9.839479501297643e-06, "loss": 0.4665, "step": 1115 }, { "epoch": 0.10861313868613139, "grad_norm": 2.1039943309751075, "learning_rate": 9.839083092023368e-06, "loss": 0.8597, "step": 1116 }, { "epoch": 0.10871046228710463, "grad_norm": 1.634678554608885, "learning_rate": 9.838686201886432e-06, "loss": 0.4907, "step": 1117 }, { "epoch": 0.10880778588807787, "grad_norm": 1.328229383966676, "learning_rate": 9.838288830926274e-06, "loss": 0.3255, "step": 1118 }, { "epoch": 0.10890510948905109, "grad_norm": 1.3587359099021656, "learning_rate": 9.837890979182381e-06, "loss": 0.4224, "step": 1119 }, { "epoch": 0.10900243309002433, "grad_norm": 1.6242900911620413, "learning_rate": 9.837492646694287e-06, "loss": 0.4338, "step": 1120 }, { "epoch": 0.10909975669099757, "grad_norm": 1.5901048900387273, "learning_rate": 9.837093833501576e-06, "loss": 0.5168, "step": 1121 }, { "epoch": 0.1091970802919708, "grad_norm": 1.34172908606168, "learning_rate": 9.836694539643878e-06, "loss": 0.3233, "step": 1122 }, { "epoch": 0.10929440389294404, "grad_norm": 1.4724714330159256, "learning_rate": 9.83629476516087e-06, "loss": 0.3652, "step": 1123 }, { "epoch": 0.10939172749391728, "grad_norm": 1.4884050773310515, "learning_rate": 9.835894510092279e-06, "loss": 0.4622, "step": 1124 }, { "epoch": 0.10948905109489052, "grad_norm": 1.3181328020728609, "learning_rate": 9.835493774477877e-06, "loss": 0.4531, "step": 1125 }, { "epoch": 0.10958637469586374, "grad_norm": 1.5414298966880746, "learning_rate": 9.835092558357488e-06, "loss": 0.3659, "step": 1126 }, { "epoch": 0.10968369829683698, "grad_norm": 1.3248299507567909, "learning_rate": 9.834690861770979e-06, "loss": 0.3207, "step": 1127 }, { "epoch": 0.10978102189781022, "grad_norm": 1.5527535683267375, "learning_rate": 9.834288684758269e-06, "loss": 0.4938, "step": 1128 }, { "epoch": 0.10987834549878346, "grad_norm": 1.3342131255187983, "learning_rate": 9.83388602735932e-06, "loss": 0.4451, "step": 1129 }, { "epoch": 0.10997566909975669, "grad_norm": 1.0500905202266426, "learning_rate": 9.833482889614143e-06, "loss": 0.2408, "step": 1130 }, { "epoch": 0.11007299270072993, "grad_norm": 1.377353907486564, "learning_rate": 9.833079271562802e-06, "loss": 0.3945, "step": 1131 }, { "epoch": 0.11017031630170317, "grad_norm": 1.5823324787969848, "learning_rate": 9.832675173245404e-06, "loss": 0.6066, "step": 1132 }, { "epoch": 0.11026763990267639, "grad_norm": 1.7266167679625446, "learning_rate": 9.832270594702102e-06, "loss": 0.6417, "step": 1133 }, { "epoch": 0.11036496350364963, "grad_norm": 1.4091165783577269, "learning_rate": 9.831865535973103e-06, "loss": 0.2661, "step": 1134 }, { "epoch": 0.11046228710462287, "grad_norm": 0.9959339686876645, "learning_rate": 9.831459997098654e-06, "loss": 0.1744, "step": 1135 }, { "epoch": 0.11055961070559611, "grad_norm": 1.4748243970921762, "learning_rate": 9.831053978119056e-06, "loss": 0.4011, "step": 1136 }, { "epoch": 0.11065693430656934, "grad_norm": 1.5879686249629044, "learning_rate": 9.830647479074656e-06, "loss": 0.3021, "step": 1137 }, { "epoch": 0.11075425790754258, "grad_norm": 1.5057704716227702, "learning_rate": 9.830240500005845e-06, "loss": 0.2962, "step": 1138 }, { "epoch": 0.11085158150851582, "grad_norm": 1.7497051535586357, "learning_rate": 9.829833040953068e-06, "loss": 0.4717, "step": 1139 }, { "epoch": 0.11094890510948906, "grad_norm": 1.7819946472609902, "learning_rate": 9.829425101956812e-06, "loss": 0.6113, "step": 1140 }, { "epoch": 0.11104622871046228, "grad_norm": 1.7680522472506797, "learning_rate": 9.829016683057615e-06, "loss": 0.4672, "step": 1141 }, { "epoch": 0.11114355231143552, "grad_norm": 1.8291787265156998, "learning_rate": 9.828607784296063e-06, "loss": 0.5148, "step": 1142 }, { "epoch": 0.11124087591240876, "grad_norm": 1.4119536127948566, "learning_rate": 9.828198405712788e-06, "loss": 0.2698, "step": 1143 }, { "epoch": 0.11133819951338199, "grad_norm": 1.67600232780131, "learning_rate": 9.827788547348469e-06, "loss": 0.4912, "step": 1144 }, { "epoch": 0.11143552311435523, "grad_norm": 1.9367616538665617, "learning_rate": 9.827378209243835e-06, "loss": 0.3781, "step": 1145 }, { "epoch": 0.11153284671532847, "grad_norm": 1.7032208896905794, "learning_rate": 9.826967391439662e-06, "loss": 0.5816, "step": 1146 }, { "epoch": 0.11163017031630171, "grad_norm": 1.60872896431165, "learning_rate": 9.826556093976769e-06, "loss": 0.4654, "step": 1147 }, { "epoch": 0.11172749391727493, "grad_norm": 1.5752275514466696, "learning_rate": 9.826144316896033e-06, "loss": 0.3177, "step": 1148 }, { "epoch": 0.11182481751824817, "grad_norm": 1.8207599924827627, "learning_rate": 9.82573206023837e-06, "loss": 0.5701, "step": 1149 }, { "epoch": 0.11192214111922141, "grad_norm": 1.5850279506541385, "learning_rate": 9.825319324044745e-06, "loss": 0.5616, "step": 1150 }, { "epoch": 0.11201946472019465, "grad_norm": 1.360496233978723, "learning_rate": 9.824906108356174e-06, "loss": 0.3407, "step": 1151 }, { "epoch": 0.11211678832116788, "grad_norm": 1.6595565610362801, "learning_rate": 9.824492413213717e-06, "loss": 0.6641, "step": 1152 }, { "epoch": 0.11221411192214112, "grad_norm": 1.6031792644515102, "learning_rate": 9.824078238658483e-06, "loss": 0.4779, "step": 1153 }, { "epoch": 0.11231143552311436, "grad_norm": 1.0762751645680708, "learning_rate": 9.82366358473163e-06, "loss": 0.2739, "step": 1154 }, { "epoch": 0.11240875912408758, "grad_norm": 1.3660129842713564, "learning_rate": 9.82324845147436e-06, "loss": 0.5043, "step": 1155 }, { "epoch": 0.11250608272506082, "grad_norm": 1.6273408315616833, "learning_rate": 9.822832838927929e-06, "loss": 0.6159, "step": 1156 }, { "epoch": 0.11260340632603406, "grad_norm": 1.4216921342906768, "learning_rate": 9.822416747133634e-06, "loss": 0.4093, "step": 1157 }, { "epoch": 0.1127007299270073, "grad_norm": 1.8899721642114575, "learning_rate": 9.822000176132822e-06, "loss": 0.5586, "step": 1158 }, { "epoch": 0.11279805352798053, "grad_norm": 1.5144459966059345, "learning_rate": 9.821583125966889e-06, "loss": 0.3806, "step": 1159 }, { "epoch": 0.11289537712895377, "grad_norm": 1.61041803725934, "learning_rate": 9.821165596677278e-06, "loss": 0.4064, "step": 1160 }, { "epoch": 0.11299270072992701, "grad_norm": 1.5410637406837986, "learning_rate": 9.820747588305477e-06, "loss": 0.3526, "step": 1161 }, { "epoch": 0.11309002433090025, "grad_norm": 1.5545393523360629, "learning_rate": 9.820329100893026e-06, "loss": 0.3834, "step": 1162 }, { "epoch": 0.11318734793187347, "grad_norm": 1.6391567381322345, "learning_rate": 9.819910134481508e-06, "loss": 0.3849, "step": 1163 }, { "epoch": 0.11328467153284671, "grad_norm": 1.5204183543600032, "learning_rate": 9.819490689112559e-06, "loss": 0.4712, "step": 1164 }, { "epoch": 0.11338199513381995, "grad_norm": 1.5168954302933022, "learning_rate": 9.819070764827858e-06, "loss": 0.4662, "step": 1165 }, { "epoch": 0.1134793187347932, "grad_norm": 1.4412304117107342, "learning_rate": 9.818650361669133e-06, "loss": 0.3515, "step": 1166 }, { "epoch": 0.11357664233576642, "grad_norm": 1.5419710047923603, "learning_rate": 9.81822947967816e-06, "loss": 0.383, "step": 1167 }, { "epoch": 0.11367396593673966, "grad_norm": 1.59211707141906, "learning_rate": 9.817808118896759e-06, "loss": 0.5101, "step": 1168 }, { "epoch": 0.1137712895377129, "grad_norm": 1.9315831066859817, "learning_rate": 9.817386279366808e-06, "loss": 0.6179, "step": 1169 }, { "epoch": 0.11386861313868613, "grad_norm": 1.3153157684002792, "learning_rate": 9.816963961130218e-06, "loss": 0.2382, "step": 1170 }, { "epoch": 0.11396593673965937, "grad_norm": 1.3579619945410324, "learning_rate": 9.81654116422896e-06, "loss": 0.4424, "step": 1171 }, { "epoch": 0.1140632603406326, "grad_norm": 1.479330223962703, "learning_rate": 9.816117888705046e-06, "loss": 0.3647, "step": 1172 }, { "epoch": 0.11416058394160584, "grad_norm": 1.5031676224913018, "learning_rate": 9.815694134600537e-06, "loss": 0.3686, "step": 1173 }, { "epoch": 0.11425790754257907, "grad_norm": 1.6106095254885215, "learning_rate": 9.815269901957543e-06, "loss": 0.5309, "step": 1174 }, { "epoch": 0.11435523114355231, "grad_norm": 1.4367590943688036, "learning_rate": 9.814845190818218e-06, "loss": 0.3786, "step": 1175 }, { "epoch": 0.11445255474452555, "grad_norm": 2.0513510648109636, "learning_rate": 9.814420001224767e-06, "loss": 0.8885, "step": 1176 }, { "epoch": 0.11454987834549879, "grad_norm": 1.3799990465326748, "learning_rate": 9.813994333219443e-06, "loss": 0.3511, "step": 1177 }, { "epoch": 0.11464720194647202, "grad_norm": 1.2354207015762353, "learning_rate": 9.813568186844541e-06, "loss": 0.3571, "step": 1178 }, { "epoch": 0.11474452554744526, "grad_norm": 2.0501383618438678, "learning_rate": 9.813141562142409e-06, "loss": 0.4485, "step": 1179 }, { "epoch": 0.1148418491484185, "grad_norm": 1.351584991091541, "learning_rate": 9.812714459155444e-06, "loss": 0.2894, "step": 1180 }, { "epoch": 0.11493917274939172, "grad_norm": 1.3568994189032655, "learning_rate": 9.812286877926085e-06, "loss": 0.4016, "step": 1181 }, { "epoch": 0.11503649635036496, "grad_norm": 1.4949546840268106, "learning_rate": 9.81185881849682e-06, "loss": 0.527, "step": 1182 }, { "epoch": 0.1151338199513382, "grad_norm": 1.5053242129518953, "learning_rate": 9.811430280910186e-06, "loss": 0.4324, "step": 1183 }, { "epoch": 0.11523114355231144, "grad_norm": 1.2995408017430223, "learning_rate": 9.811001265208768e-06, "loss": 0.4592, "step": 1184 }, { "epoch": 0.11532846715328467, "grad_norm": 1.4103061247668216, "learning_rate": 9.810571771435197e-06, "loss": 0.4615, "step": 1185 }, { "epoch": 0.1154257907542579, "grad_norm": 1.3694132099540144, "learning_rate": 9.810141799632153e-06, "loss": 0.4224, "step": 1186 }, { "epoch": 0.11552311435523115, "grad_norm": 1.4494836775882813, "learning_rate": 9.809711349842363e-06, "loss": 0.4189, "step": 1187 }, { "epoch": 0.11562043795620439, "grad_norm": 1.5100099037805617, "learning_rate": 9.809280422108598e-06, "loss": 0.495, "step": 1188 }, { "epoch": 0.11571776155717761, "grad_norm": 1.449093301695385, "learning_rate": 9.808849016473682e-06, "loss": 0.345, "step": 1189 }, { "epoch": 0.11581508515815085, "grad_norm": 1.501093862959825, "learning_rate": 9.808417132980484e-06, "loss": 0.4624, "step": 1190 }, { "epoch": 0.11591240875912409, "grad_norm": 1.4567657310588336, "learning_rate": 9.807984771671919e-06, "loss": 0.2836, "step": 1191 }, { "epoch": 0.11600973236009732, "grad_norm": 1.6666134190000732, "learning_rate": 9.807551932590952e-06, "loss": 0.3341, "step": 1192 }, { "epoch": 0.11610705596107056, "grad_norm": 1.7534770482902293, "learning_rate": 9.807118615780595e-06, "loss": 0.6021, "step": 1193 }, { "epoch": 0.1162043795620438, "grad_norm": 1.744738707996039, "learning_rate": 9.806684821283908e-06, "loss": 0.4593, "step": 1194 }, { "epoch": 0.11630170316301704, "grad_norm": 1.7519974888996959, "learning_rate": 9.806250549143994e-06, "loss": 0.5433, "step": 1195 }, { "epoch": 0.11639902676399026, "grad_norm": 1.6094009249182397, "learning_rate": 9.805815799404008e-06, "loss": 0.6053, "step": 1196 }, { "epoch": 0.1164963503649635, "grad_norm": 1.4291146386614342, "learning_rate": 9.805380572107153e-06, "loss": 0.4377, "step": 1197 }, { "epoch": 0.11659367396593674, "grad_norm": 1.6092739629047335, "learning_rate": 9.804944867296678e-06, "loss": 0.5708, "step": 1198 }, { "epoch": 0.11669099756690998, "grad_norm": 1.3856208861087336, "learning_rate": 9.804508685015876e-06, "loss": 0.3677, "step": 1199 }, { "epoch": 0.11678832116788321, "grad_norm": 1.52110832375871, "learning_rate": 9.804072025308096e-06, "loss": 0.3076, "step": 1200 }, { "epoch": 0.11688564476885645, "grad_norm": 1.3072729020716074, "learning_rate": 9.803634888216724e-06, "loss": 0.2673, "step": 1201 }, { "epoch": 0.11698296836982969, "grad_norm": 1.9045471339964295, "learning_rate": 9.8031972737852e-06, "loss": 0.7326, "step": 1202 }, { "epoch": 0.11708029197080291, "grad_norm": 1.3351659498760804, "learning_rate": 9.802759182057013e-06, "loss": 0.4193, "step": 1203 }, { "epoch": 0.11717761557177615, "grad_norm": 1.4664570380446003, "learning_rate": 9.80232061307569e-06, "loss": 0.358, "step": 1204 }, { "epoch": 0.11727493917274939, "grad_norm": 1.1764722042212887, "learning_rate": 9.80188156688482e-06, "loss": 0.3093, "step": 1205 }, { "epoch": 0.11737226277372263, "grad_norm": 1.5415184448059258, "learning_rate": 9.801442043528026e-06, "loss": 0.4667, "step": 1206 }, { "epoch": 0.11746958637469586, "grad_norm": 1.4827166479100118, "learning_rate": 9.801002043048984e-06, "loss": 0.4876, "step": 1207 }, { "epoch": 0.1175669099756691, "grad_norm": 1.6786553338713377, "learning_rate": 9.80056156549142e-06, "loss": 0.5076, "step": 1208 }, { "epoch": 0.11766423357664234, "grad_norm": 1.2161366736688597, "learning_rate": 9.8001206108991e-06, "loss": 0.2247, "step": 1209 }, { "epoch": 0.11776155717761558, "grad_norm": 1.4015628266094937, "learning_rate": 9.799679179315846e-06, "loss": 0.4327, "step": 1210 }, { "epoch": 0.1178588807785888, "grad_norm": 1.5420255844625947, "learning_rate": 9.799237270785522e-06, "loss": 0.438, "step": 1211 }, { "epoch": 0.11795620437956204, "grad_norm": 1.5978057716745744, "learning_rate": 9.79879488535204e-06, "loss": 0.4203, "step": 1212 }, { "epoch": 0.11805352798053528, "grad_norm": 1.8973070083198396, "learning_rate": 9.79835202305936e-06, "loss": 0.7404, "step": 1213 }, { "epoch": 0.11815085158150851, "grad_norm": 1.5331088091760856, "learning_rate": 9.797908683951492e-06, "loss": 0.5378, "step": 1214 }, { "epoch": 0.11824817518248175, "grad_norm": 1.9627839775910105, "learning_rate": 9.797464868072489e-06, "loss": 0.6298, "step": 1215 }, { "epoch": 0.11834549878345499, "grad_norm": 1.5059209630421948, "learning_rate": 9.797020575466452e-06, "loss": 0.4233, "step": 1216 }, { "epoch": 0.11844282238442823, "grad_norm": 1.4714593450262028, "learning_rate": 9.796575806177531e-06, "loss": 0.4078, "step": 1217 }, { "epoch": 0.11854014598540145, "grad_norm": 1.812199008547911, "learning_rate": 9.796130560249926e-06, "loss": 0.6636, "step": 1218 }, { "epoch": 0.1186374695863747, "grad_norm": 1.330364448549248, "learning_rate": 9.795684837727878e-06, "loss": 0.2597, "step": 1219 }, { "epoch": 0.11873479318734793, "grad_norm": 1.1642089342014024, "learning_rate": 9.795238638655681e-06, "loss": 0.2669, "step": 1220 }, { "epoch": 0.11883211678832117, "grad_norm": 1.0578785975666756, "learning_rate": 9.794791963077672e-06, "loss": 0.2138, "step": 1221 }, { "epoch": 0.1189294403892944, "grad_norm": 1.2810119779981208, "learning_rate": 9.794344811038239e-06, "loss": 0.3426, "step": 1222 }, { "epoch": 0.11902676399026764, "grad_norm": 1.6109574325023976, "learning_rate": 9.793897182581816e-06, "loss": 0.4931, "step": 1223 }, { "epoch": 0.11912408759124088, "grad_norm": 1.8314564663365431, "learning_rate": 9.793449077752882e-06, "loss": 0.5424, "step": 1224 }, { "epoch": 0.1192214111922141, "grad_norm": 1.3266514401224994, "learning_rate": 9.793000496595968e-06, "loss": 0.3123, "step": 1225 }, { "epoch": 0.11931873479318734, "grad_norm": 1.624792232435884, "learning_rate": 9.792551439155649e-06, "loss": 0.3635, "step": 1226 }, { "epoch": 0.11941605839416058, "grad_norm": 1.306535519875853, "learning_rate": 9.792101905476547e-06, "loss": 0.3252, "step": 1227 }, { "epoch": 0.11951338199513382, "grad_norm": 1.591218471169796, "learning_rate": 9.791651895603333e-06, "loss": 0.5493, "step": 1228 }, { "epoch": 0.11961070559610705, "grad_norm": 1.8218114354346657, "learning_rate": 9.791201409580725e-06, "loss": 0.6988, "step": 1229 }, { "epoch": 0.11970802919708029, "grad_norm": 1.7366783724272585, "learning_rate": 9.790750447453487e-06, "loss": 0.4285, "step": 1230 }, { "epoch": 0.11980535279805353, "grad_norm": 1.9439764988659998, "learning_rate": 9.790299009266434e-06, "loss": 0.2787, "step": 1231 }, { "epoch": 0.11990267639902677, "grad_norm": 1.4894849660267724, "learning_rate": 9.789847095064425e-06, "loss": 0.2531, "step": 1232 }, { "epoch": 0.12, "grad_norm": 1.6270936536604101, "learning_rate": 9.789394704892364e-06, "loss": 0.5309, "step": 1233 }, { "epoch": 0.12009732360097324, "grad_norm": 1.4144832764840753, "learning_rate": 9.788941838795209e-06, "loss": 0.298, "step": 1234 }, { "epoch": 0.12019464720194648, "grad_norm": 1.546926786538444, "learning_rate": 9.788488496817958e-06, "loss": 0.4751, "step": 1235 }, { "epoch": 0.12029197080291971, "grad_norm": 1.5827216255031866, "learning_rate": 9.788034679005664e-06, "loss": 0.4576, "step": 1236 }, { "epoch": 0.12038929440389294, "grad_norm": 1.6103699210596951, "learning_rate": 9.78758038540342e-06, "loss": 0.4637, "step": 1237 }, { "epoch": 0.12048661800486618, "grad_norm": 1.4918367462943103, "learning_rate": 9.78712561605637e-06, "loss": 0.4998, "step": 1238 }, { "epoch": 0.12058394160583942, "grad_norm": 1.5775409788682337, "learning_rate": 9.786670371009706e-06, "loss": 0.4415, "step": 1239 }, { "epoch": 0.12068126520681265, "grad_norm": 1.5427286854632911, "learning_rate": 9.786214650308666e-06, "loss": 0.4606, "step": 1240 }, { "epoch": 0.12077858880778589, "grad_norm": 1.523821034203494, "learning_rate": 9.78575845399853e-06, "loss": 0.3918, "step": 1241 }, { "epoch": 0.12087591240875913, "grad_norm": 1.950297391662121, "learning_rate": 9.785301782124638e-06, "loss": 0.5579, "step": 1242 }, { "epoch": 0.12097323600973237, "grad_norm": 1.5957141815138678, "learning_rate": 9.784844634732367e-06, "loss": 0.3814, "step": 1243 }, { "epoch": 0.12107055961070559, "grad_norm": 1.3924341327971197, "learning_rate": 9.784387011867145e-06, "loss": 0.3576, "step": 1244 }, { "epoch": 0.12116788321167883, "grad_norm": 1.670661057733516, "learning_rate": 9.783928913574442e-06, "loss": 0.5307, "step": 1245 }, { "epoch": 0.12126520681265207, "grad_norm": 1.9162789104592521, "learning_rate": 9.783470339899783e-06, "loss": 0.2309, "step": 1246 }, { "epoch": 0.12136253041362531, "grad_norm": 1.4323883393925967, "learning_rate": 9.783011290888737e-06, "loss": 0.4816, "step": 1247 }, { "epoch": 0.12145985401459854, "grad_norm": 1.133557304990043, "learning_rate": 9.78255176658692e-06, "loss": 0.259, "step": 1248 }, { "epoch": 0.12155717761557178, "grad_norm": 1.6381613262272003, "learning_rate": 9.782091767039992e-06, "loss": 0.535, "step": 1249 }, { "epoch": 0.12165450121654502, "grad_norm": 1.521879132713644, "learning_rate": 9.781631292293668e-06, "loss": 0.5299, "step": 1250 }, { "epoch": 0.12175182481751824, "grad_norm": 1.2965362290198492, "learning_rate": 9.781170342393702e-06, "loss": 0.4161, "step": 1251 }, { "epoch": 0.12184914841849148, "grad_norm": 1.4753461399295356, "learning_rate": 9.780708917385901e-06, "loss": 0.5379, "step": 1252 }, { "epoch": 0.12194647201946472, "grad_norm": 0.9509628974965367, "learning_rate": 9.780247017316115e-06, "loss": 0.2681, "step": 1253 }, { "epoch": 0.12204379562043796, "grad_norm": 1.3308735848114122, "learning_rate": 9.779784642230246e-06, "loss": 0.4247, "step": 1254 }, { "epoch": 0.12214111922141119, "grad_norm": 1.1206835484781008, "learning_rate": 9.779321792174239e-06, "loss": 0.2301, "step": 1255 }, { "epoch": 0.12223844282238443, "grad_norm": 1.2598096263209464, "learning_rate": 9.778858467194087e-06, "loss": 0.3163, "step": 1256 }, { "epoch": 0.12233576642335767, "grad_norm": 1.4871998460052394, "learning_rate": 9.778394667335834e-06, "loss": 0.3433, "step": 1257 }, { "epoch": 0.1224330900243309, "grad_norm": 1.384245738588718, "learning_rate": 9.777930392645565e-06, "loss": 0.2111, "step": 1258 }, { "epoch": 0.12253041362530413, "grad_norm": 1.4369061113982475, "learning_rate": 9.777465643169417e-06, "loss": 0.3895, "step": 1259 }, { "epoch": 0.12262773722627737, "grad_norm": 1.8558638944994366, "learning_rate": 9.777000418953568e-06, "loss": 0.3388, "step": 1260 }, { "epoch": 0.12272506082725061, "grad_norm": 1.512984108492842, "learning_rate": 9.776534720044255e-06, "loss": 0.4726, "step": 1261 }, { "epoch": 0.12282238442822384, "grad_norm": 1.367540412040702, "learning_rate": 9.77606854648775e-06, "loss": 0.2684, "step": 1262 }, { "epoch": 0.12291970802919708, "grad_norm": 1.2042550068870583, "learning_rate": 9.775601898330377e-06, "loss": 0.2173, "step": 1263 }, { "epoch": 0.12301703163017032, "grad_norm": 1.5842484372844456, "learning_rate": 9.775134775618509e-06, "loss": 0.5608, "step": 1264 }, { "epoch": 0.12311435523114356, "grad_norm": 1.397447971201202, "learning_rate": 9.774667178398562e-06, "loss": 0.4632, "step": 1265 }, { "epoch": 0.12321167883211678, "grad_norm": 1.3468996882112099, "learning_rate": 9.774199106717004e-06, "loss": 0.3697, "step": 1266 }, { "epoch": 0.12330900243309002, "grad_norm": 1.252677053550249, "learning_rate": 9.773730560620345e-06, "loss": 0.2377, "step": 1267 }, { "epoch": 0.12340632603406326, "grad_norm": 1.4179546260918483, "learning_rate": 9.773261540155148e-06, "loss": 0.4857, "step": 1268 }, { "epoch": 0.1235036496350365, "grad_norm": 1.3092572252570605, "learning_rate": 9.772792045368015e-06, "loss": 0.2969, "step": 1269 }, { "epoch": 0.12360097323600973, "grad_norm": 1.7901486760202572, "learning_rate": 9.772322076305607e-06, "loss": 0.6935, "step": 1270 }, { "epoch": 0.12369829683698297, "grad_norm": 1.5982523135009328, "learning_rate": 9.771851633014618e-06, "loss": 0.4368, "step": 1271 }, { "epoch": 0.12379562043795621, "grad_norm": 1.195950207110724, "learning_rate": 9.7713807155418e-06, "loss": 0.3202, "step": 1272 }, { "epoch": 0.12389294403892943, "grad_norm": 1.352519407714817, "learning_rate": 9.770909323933947e-06, "loss": 0.4284, "step": 1273 }, { "epoch": 0.12399026763990267, "grad_norm": 1.4231425912579843, "learning_rate": 9.770437458237903e-06, "loss": 0.434, "step": 1274 }, { "epoch": 0.12408759124087591, "grad_norm": 1.2825234760121222, "learning_rate": 9.769965118500555e-06, "loss": 0.3817, "step": 1275 }, { "epoch": 0.12418491484184915, "grad_norm": 1.8250797045299043, "learning_rate": 9.769492304768843e-06, "loss": 0.7366, "step": 1276 }, { "epoch": 0.12428223844282238, "grad_norm": 1.3974167065714918, "learning_rate": 9.769019017089748e-06, "loss": 0.2804, "step": 1277 }, { "epoch": 0.12437956204379562, "grad_norm": 1.2933017267383033, "learning_rate": 9.768545255510302e-06, "loss": 0.3495, "step": 1278 }, { "epoch": 0.12447688564476886, "grad_norm": 1.2423501538003798, "learning_rate": 9.768071020077584e-06, "loss": 0.2908, "step": 1279 }, { "epoch": 0.1245742092457421, "grad_norm": 1.8228975858143868, "learning_rate": 9.767596310838718e-06, "loss": 0.4222, "step": 1280 }, { "epoch": 0.12467153284671532, "grad_norm": 1.5510872411682606, "learning_rate": 9.767121127840874e-06, "loss": 0.5058, "step": 1281 }, { "epoch": 0.12476885644768856, "grad_norm": 1.6665778692750302, "learning_rate": 9.766645471131278e-06, "loss": 0.3592, "step": 1282 }, { "epoch": 0.1248661800486618, "grad_norm": 1.5396481092124317, "learning_rate": 9.766169340757187e-06, "loss": 0.2737, "step": 1283 }, { "epoch": 0.12496350364963503, "grad_norm": 1.5555229817491858, "learning_rate": 9.765692736765922e-06, "loss": 0.5466, "step": 1284 }, { "epoch": 0.12506082725060827, "grad_norm": 1.5351601326386175, "learning_rate": 9.765215659204838e-06, "loss": 0.4733, "step": 1285 }, { "epoch": 0.1251581508515815, "grad_norm": 1.2793773363741519, "learning_rate": 9.764738108121347e-06, "loss": 0.3056, "step": 1286 }, { "epoch": 0.12525547445255475, "grad_norm": 1.6331577939793205, "learning_rate": 9.764260083562902e-06, "loss": 0.5883, "step": 1287 }, { "epoch": 0.12535279805352798, "grad_norm": 1.3363728845544067, "learning_rate": 9.763781585577003e-06, "loss": 0.2904, "step": 1288 }, { "epoch": 0.12545012165450123, "grad_norm": 1.360818732035961, "learning_rate": 9.763302614211199e-06, "loss": 0.4202, "step": 1289 }, { "epoch": 0.12554744525547445, "grad_norm": 1.3103877737057137, "learning_rate": 9.762823169513089e-06, "loss": 0.4694, "step": 1290 }, { "epoch": 0.12564476885644768, "grad_norm": 1.1848446118808063, "learning_rate": 9.76234325153031e-06, "loss": 0.2265, "step": 1291 }, { "epoch": 0.12574209245742093, "grad_norm": 1.3494947194310234, "learning_rate": 9.761862860310558e-06, "loss": 0.2382, "step": 1292 }, { "epoch": 0.12583941605839416, "grad_norm": 1.7062717031139596, "learning_rate": 9.761381995901564e-06, "loss": 0.7254, "step": 1293 }, { "epoch": 0.12593673965936739, "grad_norm": 1.208337515242783, "learning_rate": 9.760900658351117e-06, "loss": 0.326, "step": 1294 }, { "epoch": 0.12603406326034064, "grad_norm": 1.3159841432369768, "learning_rate": 9.760418847707043e-06, "loss": 0.3438, "step": 1295 }, { "epoch": 0.12613138686131387, "grad_norm": 1.3809255020300633, "learning_rate": 9.759936564017223e-06, "loss": 0.2716, "step": 1296 }, { "epoch": 0.1262287104622871, "grad_norm": 1.3382917039666673, "learning_rate": 9.759453807329582e-06, "loss": 0.2882, "step": 1297 }, { "epoch": 0.12632603406326035, "grad_norm": 1.3572918507167704, "learning_rate": 9.75897057769209e-06, "loss": 0.4181, "step": 1298 }, { "epoch": 0.12642335766423357, "grad_norm": 1.4433440128897468, "learning_rate": 9.758486875152766e-06, "loss": 0.4883, "step": 1299 }, { "epoch": 0.12652068126520682, "grad_norm": 1.1934091211117765, "learning_rate": 9.758002699759677e-06, "loss": 0.3828, "step": 1300 }, { "epoch": 0.12661800486618005, "grad_norm": 1.4647925609545562, "learning_rate": 9.757518051560935e-06, "loss": 0.402, "step": 1301 }, { "epoch": 0.12671532846715328, "grad_norm": 1.658517832372951, "learning_rate": 9.7570329306047e-06, "loss": 0.6752, "step": 1302 }, { "epoch": 0.12681265206812653, "grad_norm": 1.2682494280043264, "learning_rate": 9.75654733693918e-06, "loss": 0.2786, "step": 1303 }, { "epoch": 0.12690997566909976, "grad_norm": 1.3919267883395627, "learning_rate": 9.756061270612625e-06, "loss": 0.4806, "step": 1304 }, { "epoch": 0.12700729927007298, "grad_norm": 1.160118847382142, "learning_rate": 9.75557473167334e-06, "loss": 0.2458, "step": 1305 }, { "epoch": 0.12710462287104624, "grad_norm": 1.482640427472728, "learning_rate": 9.755087720169672e-06, "loss": 0.527, "step": 1306 }, { "epoch": 0.12720194647201946, "grad_norm": 1.5068875178509769, "learning_rate": 9.75460023615001e-06, "loss": 0.4985, "step": 1307 }, { "epoch": 0.1272992700729927, "grad_norm": 1.2878541774064265, "learning_rate": 9.754112279662805e-06, "loss": 0.3478, "step": 1308 }, { "epoch": 0.12739659367396594, "grad_norm": 1.1398490461157162, "learning_rate": 9.75362385075654e-06, "loss": 0.3084, "step": 1309 }, { "epoch": 0.12749391727493917, "grad_norm": 1.2924420070365765, "learning_rate": 9.75313494947975e-06, "loss": 0.3919, "step": 1310 }, { "epoch": 0.12759124087591242, "grad_norm": 1.4558696462945964, "learning_rate": 9.752645575881018e-06, "loss": 0.225, "step": 1311 }, { "epoch": 0.12768856447688565, "grad_norm": 1.677251779693783, "learning_rate": 9.752155730008974e-06, "loss": 0.4831, "step": 1312 }, { "epoch": 0.12778588807785887, "grad_norm": 1.3350720195417478, "learning_rate": 9.751665411912294e-06, "loss": 0.4371, "step": 1313 }, { "epoch": 0.12788321167883213, "grad_norm": 1.3653039655289896, "learning_rate": 9.751174621639702e-06, "loss": 0.4051, "step": 1314 }, { "epoch": 0.12798053527980535, "grad_norm": 2.0214110135389927, "learning_rate": 9.75068335923997e-06, "loss": 0.4971, "step": 1315 }, { "epoch": 0.12807785888077858, "grad_norm": 1.7144522600221743, "learning_rate": 9.750191624761909e-06, "loss": 0.6353, "step": 1316 }, { "epoch": 0.12817518248175183, "grad_norm": 1.61491787633751, "learning_rate": 9.749699418254388e-06, "loss": 0.5408, "step": 1317 }, { "epoch": 0.12827250608272506, "grad_norm": 1.3029361322695596, "learning_rate": 9.749206739766317e-06, "loss": 0.407, "step": 1318 }, { "epoch": 0.12836982968369828, "grad_norm": 1.2453189940624274, "learning_rate": 9.748713589346652e-06, "loss": 0.3254, "step": 1319 }, { "epoch": 0.12846715328467154, "grad_norm": 1.4117795102544664, "learning_rate": 9.748219967044398e-06, "loss": 0.3941, "step": 1320 }, { "epoch": 0.12856447688564476, "grad_norm": 1.4197813276706028, "learning_rate": 9.74772587290861e-06, "loss": 0.3454, "step": 1321 }, { "epoch": 0.12866180048661802, "grad_norm": 1.3133599325252279, "learning_rate": 9.747231306988381e-06, "loss": 0.3389, "step": 1322 }, { "epoch": 0.12875912408759124, "grad_norm": 1.3432229805022793, "learning_rate": 9.746736269332861e-06, "loss": 0.469, "step": 1323 }, { "epoch": 0.12885644768856447, "grad_norm": 1.1244292400820686, "learning_rate": 9.746240759991241e-06, "loss": 0.3674, "step": 1324 }, { "epoch": 0.12895377128953772, "grad_norm": 1.4966792860681473, "learning_rate": 9.745744779012758e-06, "loss": 0.4308, "step": 1325 }, { "epoch": 0.12905109489051095, "grad_norm": 1.5238028846181695, "learning_rate": 9.745248326446699e-06, "loss": 0.4213, "step": 1326 }, { "epoch": 0.12914841849148417, "grad_norm": 1.3633303920337936, "learning_rate": 9.744751402342398e-06, "loss": 0.438, "step": 1327 }, { "epoch": 0.12924574209245743, "grad_norm": 1.3260493495785517, "learning_rate": 9.744254006749235e-06, "loss": 0.4762, "step": 1328 }, { "epoch": 0.12934306569343065, "grad_norm": 1.705738477220435, "learning_rate": 9.743756139716634e-06, "loss": 0.5861, "step": 1329 }, { "epoch": 0.12944038929440388, "grad_norm": 1.5829201544013396, "learning_rate": 9.743257801294069e-06, "loss": 0.469, "step": 1330 }, { "epoch": 0.12953771289537713, "grad_norm": 1.1445128143179795, "learning_rate": 9.74275899153106e-06, "loss": 0.4018, "step": 1331 }, { "epoch": 0.12963503649635036, "grad_norm": 1.2900129109113572, "learning_rate": 9.742259710477178e-06, "loss": 0.3802, "step": 1332 }, { "epoch": 0.1297323600973236, "grad_norm": 1.3212461161488713, "learning_rate": 9.74175995818203e-06, "loss": 0.3725, "step": 1333 }, { "epoch": 0.12982968369829684, "grad_norm": 1.3979706650986563, "learning_rate": 9.741259734695283e-06, "loss": 0.3961, "step": 1334 }, { "epoch": 0.12992700729927006, "grad_norm": 1.2642819849441118, "learning_rate": 9.740759040066642e-06, "loss": 0.3528, "step": 1335 }, { "epoch": 0.13002433090024332, "grad_norm": 1.7776493019463793, "learning_rate": 9.74025787434586e-06, "loss": 0.6424, "step": 1336 }, { "epoch": 0.13012165450121654, "grad_norm": 1.1885806737857232, "learning_rate": 9.73975623758274e-06, "loss": 0.3163, "step": 1337 }, { "epoch": 0.13021897810218977, "grad_norm": 1.7443954093720497, "learning_rate": 9.739254129827131e-06, "loss": 0.7263, "step": 1338 }, { "epoch": 0.13031630170316302, "grad_norm": 1.7005058938305366, "learning_rate": 9.738751551128924e-06, "loss": 0.5204, "step": 1339 }, { "epoch": 0.13041362530413625, "grad_norm": 6.598521165184121, "learning_rate": 9.738248501538063e-06, "loss": 0.5113, "step": 1340 }, { "epoch": 0.1305109489051095, "grad_norm": 1.6203066466178853, "learning_rate": 9.737744981104536e-06, "loss": 0.625, "step": 1341 }, { "epoch": 0.13060827250608273, "grad_norm": 1.548111392574701, "learning_rate": 9.73724098987838e-06, "loss": 0.3952, "step": 1342 }, { "epoch": 0.13070559610705595, "grad_norm": 1.4871418112966692, "learning_rate": 9.736736527909674e-06, "loss": 0.5084, "step": 1343 }, { "epoch": 0.1308029197080292, "grad_norm": 1.0723677900938815, "learning_rate": 9.736231595248546e-06, "loss": 0.255, "step": 1344 }, { "epoch": 0.13090024330900243, "grad_norm": 1.5695490713137843, "learning_rate": 9.735726191945176e-06, "loss": 0.3438, "step": 1345 }, { "epoch": 0.13099756690997566, "grad_norm": 2.059617079542521, "learning_rate": 9.73522031804978e-06, "loss": 0.5249, "step": 1346 }, { "epoch": 0.1310948905109489, "grad_norm": 1.5301765260275246, "learning_rate": 9.734713973612633e-06, "loss": 0.3667, "step": 1347 }, { "epoch": 0.13119221411192214, "grad_norm": 1.7431028553023509, "learning_rate": 9.734207158684048e-06, "loss": 0.5551, "step": 1348 }, { "epoch": 0.13128953771289537, "grad_norm": 1.2916959738739295, "learning_rate": 9.733699873314388e-06, "loss": 0.278, "step": 1349 }, { "epoch": 0.13138686131386862, "grad_norm": 1.5891072584842363, "learning_rate": 9.733192117554062e-06, "loss": 0.4139, "step": 1350 }, { "epoch": 0.13148418491484185, "grad_norm": 1.6366778166029219, "learning_rate": 9.732683891453528e-06, "loss": 0.4888, "step": 1351 }, { "epoch": 0.1315815085158151, "grad_norm": 1.6763551525158185, "learning_rate": 9.732175195063283e-06, "loss": 0.5432, "step": 1352 }, { "epoch": 0.13167883211678832, "grad_norm": 1.551593865483807, "learning_rate": 9.731666028433882e-06, "loss": 0.5634, "step": 1353 }, { "epoch": 0.13177615571776155, "grad_norm": 1.693219206573502, "learning_rate": 9.731156391615919e-06, "loss": 0.4554, "step": 1354 }, { "epoch": 0.1318734793187348, "grad_norm": 1.4894832853139421, "learning_rate": 9.730646284660037e-06, "loss": 0.4286, "step": 1355 }, { "epoch": 0.13197080291970803, "grad_norm": 1.20058966692396, "learning_rate": 9.730135707616927e-06, "loss": 0.2519, "step": 1356 }, { "epoch": 0.13206812652068126, "grad_norm": 1.395115321325138, "learning_rate": 9.729624660537324e-06, "loss": 0.3718, "step": 1357 }, { "epoch": 0.1321654501216545, "grad_norm": 1.3441869335850034, "learning_rate": 9.729113143472011e-06, "loss": 0.43, "step": 1358 }, { "epoch": 0.13226277372262774, "grad_norm": 1.31865416445236, "learning_rate": 9.72860115647182e-06, "loss": 0.296, "step": 1359 }, { "epoch": 0.13236009732360096, "grad_norm": 1.3998148863889133, "learning_rate": 9.728088699587623e-06, "loss": 0.2642, "step": 1360 }, { "epoch": 0.13245742092457422, "grad_norm": 1.5917388343760925, "learning_rate": 9.727575772870347e-06, "loss": 0.5999, "step": 1361 }, { "epoch": 0.13255474452554744, "grad_norm": 1.6062441992747731, "learning_rate": 9.727062376370962e-06, "loss": 0.6017, "step": 1362 }, { "epoch": 0.1326520681265207, "grad_norm": 1.756325054261889, "learning_rate": 9.72654851014048e-06, "loss": 0.5855, "step": 1363 }, { "epoch": 0.13274939172749392, "grad_norm": 1.5782112626775713, "learning_rate": 9.72603417422997e-06, "loss": 0.5643, "step": 1364 }, { "epoch": 0.13284671532846715, "grad_norm": 1.6280008631148617, "learning_rate": 9.725519368690539e-06, "loss": 0.3918, "step": 1365 }, { "epoch": 0.1329440389294404, "grad_norm": 1.731476294535625, "learning_rate": 9.725004093573343e-06, "loss": 0.6909, "step": 1366 }, { "epoch": 0.13304136253041363, "grad_norm": 1.7012591859680217, "learning_rate": 9.724488348929587e-06, "loss": 0.3206, "step": 1367 }, { "epoch": 0.13313868613138685, "grad_norm": 1.5539166250213363, "learning_rate": 9.723972134810519e-06, "loss": 0.3735, "step": 1368 }, { "epoch": 0.1332360097323601, "grad_norm": 1.2431527472113675, "learning_rate": 9.723455451267436e-06, "loss": 0.4023, "step": 1369 }, { "epoch": 0.13333333333333333, "grad_norm": 1.4147928785913308, "learning_rate": 9.722938298351682e-06, "loss": 0.4501, "step": 1370 }, { "epoch": 0.13343065693430656, "grad_norm": 1.5209109752466956, "learning_rate": 9.722420676114646e-06, "loss": 0.4504, "step": 1371 }, { "epoch": 0.1335279805352798, "grad_norm": 1.6031977794999224, "learning_rate": 9.721902584607766e-06, "loss": 0.4036, "step": 1372 }, { "epoch": 0.13362530413625304, "grad_norm": 1.3752266957066934, "learning_rate": 9.721384023882524e-06, "loss": 0.4008, "step": 1373 }, { "epoch": 0.1337226277372263, "grad_norm": 1.2289957585024915, "learning_rate": 9.720864993990448e-06, "loss": 0.3214, "step": 1374 }, { "epoch": 0.13381995133819952, "grad_norm": 1.5334770200964671, "learning_rate": 9.720345494983117e-06, "loss": 0.4103, "step": 1375 }, { "epoch": 0.13391727493917274, "grad_norm": 1.4428318489533865, "learning_rate": 9.719825526912152e-06, "loss": 0.4314, "step": 1376 }, { "epoch": 0.134014598540146, "grad_norm": 1.6794168653476527, "learning_rate": 9.719305089829224e-06, "loss": 0.6027, "step": 1377 }, { "epoch": 0.13411192214111922, "grad_norm": 1.4695816931820398, "learning_rate": 9.718784183786048e-06, "loss": 0.5337, "step": 1378 }, { "epoch": 0.13420924574209245, "grad_norm": 1.428180254445363, "learning_rate": 9.718262808834386e-06, "loss": 0.3636, "step": 1379 }, { "epoch": 0.1343065693430657, "grad_norm": 1.4446624118640763, "learning_rate": 9.717740965026051e-06, "loss": 0.4213, "step": 1380 }, { "epoch": 0.13440389294403893, "grad_norm": 1.0145899854020284, "learning_rate": 9.717218652412896e-06, "loss": 0.292, "step": 1381 }, { "epoch": 0.13450121654501215, "grad_norm": 1.4589445831305994, "learning_rate": 9.716695871046824e-06, "loss": 0.4787, "step": 1382 }, { "epoch": 0.1345985401459854, "grad_norm": 1.5462880417778382, "learning_rate": 9.716172620979783e-06, "loss": 0.4716, "step": 1383 }, { "epoch": 0.13469586374695863, "grad_norm": 1.5411526965931406, "learning_rate": 9.71564890226377e-06, "loss": 0.5311, "step": 1384 }, { "epoch": 0.1347931873479319, "grad_norm": 1.1928924795974905, "learning_rate": 9.71512471495083e-06, "loss": 0.2724, "step": 1385 }, { "epoch": 0.1348905109489051, "grad_norm": 1.3201585939530793, "learning_rate": 9.714600059093045e-06, "loss": 0.2987, "step": 1386 }, { "epoch": 0.13498783454987834, "grad_norm": 1.5687746327202647, "learning_rate": 9.714074934742556e-06, "loss": 0.363, "step": 1387 }, { "epoch": 0.1350851581508516, "grad_norm": 1.5338932500845779, "learning_rate": 9.713549341951543e-06, "loss": 0.5661, "step": 1388 }, { "epoch": 0.13518248175182482, "grad_norm": 1.1601153536694444, "learning_rate": 9.713023280772236e-06, "loss": 0.3079, "step": 1389 }, { "epoch": 0.13527980535279804, "grad_norm": 1.3983637614495477, "learning_rate": 9.712496751256907e-06, "loss": 0.4741, "step": 1390 }, { "epoch": 0.1353771289537713, "grad_norm": 1.2378967843544995, "learning_rate": 9.71196975345788e-06, "loss": 0.3467, "step": 1391 }, { "epoch": 0.13547445255474452, "grad_norm": 1.3128641622430697, "learning_rate": 9.711442287427523e-06, "loss": 0.413, "step": 1392 }, { "epoch": 0.13557177615571775, "grad_norm": 1.6638151172989781, "learning_rate": 9.71091435321825e-06, "loss": 0.4844, "step": 1393 }, { "epoch": 0.135669099756691, "grad_norm": 1.5023430961651105, "learning_rate": 9.710385950882522e-06, "loss": 0.3639, "step": 1394 }, { "epoch": 0.13576642335766423, "grad_norm": 1.3286069107884302, "learning_rate": 9.709857080472847e-06, "loss": 0.4055, "step": 1395 }, { "epoch": 0.13586374695863748, "grad_norm": 1.2934746343236392, "learning_rate": 9.709327742041776e-06, "loss": 0.2837, "step": 1396 }, { "epoch": 0.1359610705596107, "grad_norm": 1.698077360010743, "learning_rate": 9.708797935641915e-06, "loss": 0.3687, "step": 1397 }, { "epoch": 0.13605839416058393, "grad_norm": 1.412271661785088, "learning_rate": 9.70826766132591e-06, "loss": 0.3577, "step": 1398 }, { "epoch": 0.1361557177615572, "grad_norm": 1.5421055950766074, "learning_rate": 9.707736919146453e-06, "loss": 0.5394, "step": 1399 }, { "epoch": 0.1362530413625304, "grad_norm": 2.554386599490806, "learning_rate": 9.707205709156285e-06, "loss": 0.212, "step": 1400 }, { "epoch": 0.13635036496350364, "grad_norm": 1.7436805109650844, "learning_rate": 9.70667403140819e-06, "loss": 0.6893, "step": 1401 }, { "epoch": 0.1364476885644769, "grad_norm": 1.613527884115612, "learning_rate": 9.706141885955006e-06, "loss": 0.42, "step": 1402 }, { "epoch": 0.13654501216545012, "grad_norm": 1.711619341430359, "learning_rate": 9.70560927284961e-06, "loss": 0.7025, "step": 1403 }, { "epoch": 0.13664233576642335, "grad_norm": 1.5376532439489434, "learning_rate": 9.705076192144927e-06, "loss": 0.5201, "step": 1404 }, { "epoch": 0.1367396593673966, "grad_norm": 1.492510855426001, "learning_rate": 9.704542643893931e-06, "loss": 0.4281, "step": 1405 }, { "epoch": 0.13683698296836982, "grad_norm": 1.5678573317920237, "learning_rate": 9.704008628149641e-06, "loss": 0.506, "step": 1406 }, { "epoch": 0.13693430656934308, "grad_norm": 1.3237691920747017, "learning_rate": 9.703474144965123e-06, "loss": 0.4114, "step": 1407 }, { "epoch": 0.1370316301703163, "grad_norm": 1.4134135574988251, "learning_rate": 9.702939194393489e-06, "loss": 0.3806, "step": 1408 }, { "epoch": 0.13712895377128953, "grad_norm": 1.5544258549266206, "learning_rate": 9.702403776487895e-06, "loss": 0.4863, "step": 1409 }, { "epoch": 0.13722627737226278, "grad_norm": 1.3619063912879554, "learning_rate": 9.701867891301548e-06, "loss": 0.3692, "step": 1410 }, { "epoch": 0.137323600973236, "grad_norm": 1.5146665393724075, "learning_rate": 9.701331538887699e-06, "loss": 0.3311, "step": 1411 }, { "epoch": 0.13742092457420924, "grad_norm": 1.5674647990142176, "learning_rate": 9.700794719299644e-06, "loss": 0.5292, "step": 1412 }, { "epoch": 0.1375182481751825, "grad_norm": 1.4711236643775818, "learning_rate": 9.700257432590729e-06, "loss": 0.466, "step": 1413 }, { "epoch": 0.13761557177615572, "grad_norm": 1.4410106250389758, "learning_rate": 9.699719678814345e-06, "loss": 0.3276, "step": 1414 }, { "epoch": 0.13771289537712894, "grad_norm": 1.652937978394441, "learning_rate": 9.699181458023927e-06, "loss": 0.5391, "step": 1415 }, { "epoch": 0.1378102189781022, "grad_norm": 1.7285587973510355, "learning_rate": 9.698642770272959e-06, "loss": 0.5707, "step": 1416 }, { "epoch": 0.13790754257907542, "grad_norm": 1.325058257423692, "learning_rate": 9.698103615614972e-06, "loss": 0.3429, "step": 1417 }, { "epoch": 0.13800486618004867, "grad_norm": 1.5653351048996198, "learning_rate": 9.69756399410354e-06, "loss": 0.4132, "step": 1418 }, { "epoch": 0.1381021897810219, "grad_norm": 1.603805088396393, "learning_rate": 9.697023905792287e-06, "loss": 0.4983, "step": 1419 }, { "epoch": 0.13819951338199513, "grad_norm": 1.5052443063346659, "learning_rate": 9.69648335073488e-06, "loss": 0.2713, "step": 1420 }, { "epoch": 0.13829683698296838, "grad_norm": 1.30196768692164, "learning_rate": 9.695942328985037e-06, "loss": 0.27, "step": 1421 }, { "epoch": 0.1383941605839416, "grad_norm": 1.1542739478608208, "learning_rate": 9.695400840596519e-06, "loss": 0.3309, "step": 1422 }, { "epoch": 0.13849148418491483, "grad_norm": 1.1029138054910885, "learning_rate": 9.694858885623132e-06, "loss": 0.3262, "step": 1423 }, { "epoch": 0.13858880778588809, "grad_norm": 1.581389120261872, "learning_rate": 9.694316464118732e-06, "loss": 0.4663, "step": 1424 }, { "epoch": 0.1386861313868613, "grad_norm": 1.2966198038166061, "learning_rate": 9.69377357613722e-06, "loss": 0.336, "step": 1425 }, { "epoch": 0.13878345498783454, "grad_norm": 1.505634533514273, "learning_rate": 9.693230221732544e-06, "loss": 0.4269, "step": 1426 }, { "epoch": 0.1388807785888078, "grad_norm": 1.274453115047599, "learning_rate": 9.692686400958695e-06, "loss": 0.3978, "step": 1427 }, { "epoch": 0.13897810218978102, "grad_norm": 1.2126154933077449, "learning_rate": 9.692142113869714e-06, "loss": 0.2754, "step": 1428 }, { "epoch": 0.13907542579075427, "grad_norm": 1.4884313472642259, "learning_rate": 9.691597360519686e-06, "loss": 0.4661, "step": 1429 }, { "epoch": 0.1391727493917275, "grad_norm": 1.5680101511782372, "learning_rate": 9.691052140962747e-06, "loss": 0.4237, "step": 1430 }, { "epoch": 0.13927007299270072, "grad_norm": 1.325640699841282, "learning_rate": 9.690506455253073e-06, "loss": 0.3988, "step": 1431 }, { "epoch": 0.13936739659367398, "grad_norm": 1.3107002270910884, "learning_rate": 9.689960303444887e-06, "loss": 0.4268, "step": 1432 }, { "epoch": 0.1394647201946472, "grad_norm": 1.9246823036308274, "learning_rate": 9.689413685592465e-06, "loss": 0.3733, "step": 1433 }, { "epoch": 0.13956204379562043, "grad_norm": 1.3731854343094059, "learning_rate": 9.688866601750122e-06, "loss": 0.4215, "step": 1434 }, { "epoch": 0.13965936739659368, "grad_norm": 1.368964734934982, "learning_rate": 9.688319051972224e-06, "loss": 0.4697, "step": 1435 }, { "epoch": 0.1397566909975669, "grad_norm": 1.3451140821212522, "learning_rate": 9.687771036313178e-06, "loss": 0.3741, "step": 1436 }, { "epoch": 0.13985401459854013, "grad_norm": 1.5372748667563303, "learning_rate": 9.687222554827444e-06, "loss": 0.4199, "step": 1437 }, { "epoch": 0.1399513381995134, "grad_norm": 1.1780522614950486, "learning_rate": 9.686673607569526e-06, "loss": 0.3602, "step": 1438 }, { "epoch": 0.1400486618004866, "grad_norm": 1.20778383169165, "learning_rate": 9.686124194593967e-06, "loss": 0.23, "step": 1439 }, { "epoch": 0.14014598540145987, "grad_norm": 1.6760972087501165, "learning_rate": 9.685574315955368e-06, "loss": 0.5089, "step": 1440 }, { "epoch": 0.1402433090024331, "grad_norm": 1.7963497555189056, "learning_rate": 9.68502397170837e-06, "loss": 0.3932, "step": 1441 }, { "epoch": 0.14034063260340632, "grad_norm": 1.401968265514402, "learning_rate": 9.68447316190766e-06, "loss": 0.4272, "step": 1442 }, { "epoch": 0.14043795620437957, "grad_norm": 1.1461895591250986, "learning_rate": 9.683921886607973e-06, "loss": 0.3003, "step": 1443 }, { "epoch": 0.1405352798053528, "grad_norm": 1.8257595963636586, "learning_rate": 9.683370145864089e-06, "loss": 0.4454, "step": 1444 }, { "epoch": 0.14063260340632602, "grad_norm": 1.3483599166387192, "learning_rate": 9.682817939730833e-06, "loss": 0.3708, "step": 1445 }, { "epoch": 0.14072992700729928, "grad_norm": 1.4560700792487955, "learning_rate": 9.682265268263083e-06, "loss": 0.4321, "step": 1446 }, { "epoch": 0.1408272506082725, "grad_norm": 1.4364952224667933, "learning_rate": 9.681712131515753e-06, "loss": 0.3812, "step": 1447 }, { "epoch": 0.14092457420924573, "grad_norm": 1.6808986821455574, "learning_rate": 9.681158529543812e-06, "loss": 0.3939, "step": 1448 }, { "epoch": 0.14102189781021898, "grad_norm": 1.5327313322922438, "learning_rate": 9.68060446240227e-06, "loss": 0.3617, "step": 1449 }, { "epoch": 0.1411192214111922, "grad_norm": 1.9055650449775412, "learning_rate": 9.680049930146186e-06, "loss": 0.4984, "step": 1450 }, { "epoch": 0.14121654501216546, "grad_norm": 1.8971706606162055, "learning_rate": 9.679494932830664e-06, "loss": 0.4196, "step": 1451 }, { "epoch": 0.1413138686131387, "grad_norm": 1.7337796675846617, "learning_rate": 9.678939470510856e-06, "loss": 0.4282, "step": 1452 }, { "epoch": 0.1414111922141119, "grad_norm": 1.6436762455975924, "learning_rate": 9.678383543241954e-06, "loss": 0.425, "step": 1453 }, { "epoch": 0.14150851581508517, "grad_norm": 1.3304471527694197, "learning_rate": 9.677827151079205e-06, "loss": 0.346, "step": 1454 }, { "epoch": 0.1416058394160584, "grad_norm": 1.3162532004293022, "learning_rate": 9.677270294077895e-06, "loss": 0.4492, "step": 1455 }, { "epoch": 0.14170316301703162, "grad_norm": 1.2299075830057253, "learning_rate": 9.676712972293363e-06, "loss": 0.3525, "step": 1456 }, { "epoch": 0.14180048661800487, "grad_norm": 1.7174455721253266, "learning_rate": 9.676155185780989e-06, "loss": 0.763, "step": 1457 }, { "epoch": 0.1418978102189781, "grad_norm": 0.9624475539149472, "learning_rate": 9.675596934596198e-06, "loss": 0.2234, "step": 1458 }, { "epoch": 0.14199513381995132, "grad_norm": 1.380722751360302, "learning_rate": 9.675038218794469e-06, "loss": 0.3539, "step": 1459 }, { "epoch": 0.14209245742092458, "grad_norm": 1.3595616004290971, "learning_rate": 9.674479038431314e-06, "loss": 0.4356, "step": 1460 }, { "epoch": 0.1421897810218978, "grad_norm": 1.2777542247997187, "learning_rate": 9.673919393562308e-06, "loss": 0.3233, "step": 1461 }, { "epoch": 0.14228710462287106, "grad_norm": 1.23752096524445, "learning_rate": 9.673359284243055e-06, "loss": 0.405, "step": 1462 }, { "epoch": 0.14238442822384428, "grad_norm": 1.4547729172095425, "learning_rate": 9.672798710529222e-06, "loss": 0.5356, "step": 1463 }, { "epoch": 0.1424817518248175, "grad_norm": 1.5976011084855026, "learning_rate": 9.672237672476506e-06, "loss": 0.571, "step": 1464 }, { "epoch": 0.14257907542579076, "grad_norm": 1.4454139467669962, "learning_rate": 9.67167617014066e-06, "loss": 0.4556, "step": 1465 }, { "epoch": 0.142676399026764, "grad_norm": 1.5296734849172828, "learning_rate": 9.671114203577485e-06, "loss": 0.5791, "step": 1466 }, { "epoch": 0.14277372262773722, "grad_norm": 1.0140913901893902, "learning_rate": 9.670551772842818e-06, "loss": 0.2732, "step": 1467 }, { "epoch": 0.14287104622871047, "grad_norm": 1.5600773149062541, "learning_rate": 9.669988877992551e-06, "loss": 0.3902, "step": 1468 }, { "epoch": 0.1429683698296837, "grad_norm": 1.4767158872669255, "learning_rate": 9.66942551908262e-06, "loss": 0.5531, "step": 1469 }, { "epoch": 0.14306569343065692, "grad_norm": 1.1134570066684917, "learning_rate": 9.668861696169003e-06, "loss": 0.278, "step": 1470 }, { "epoch": 0.14316301703163017, "grad_norm": 0.9776488708344422, "learning_rate": 9.66829740930773e-06, "loss": 0.231, "step": 1471 }, { "epoch": 0.1432603406326034, "grad_norm": 1.4647496714581032, "learning_rate": 9.667732658554875e-06, "loss": 0.485, "step": 1472 }, { "epoch": 0.14335766423357665, "grad_norm": 1.2234301570511203, "learning_rate": 9.667167443966557e-06, "loss": 0.3944, "step": 1473 }, { "epoch": 0.14345498783454988, "grad_norm": 1.3655487702696618, "learning_rate": 9.66660176559894e-06, "loss": 0.3989, "step": 1474 }, { "epoch": 0.1435523114355231, "grad_norm": 1.4690108372007447, "learning_rate": 9.666035623508238e-06, "loss": 0.4311, "step": 1475 }, { "epoch": 0.14364963503649636, "grad_norm": 1.1910374305057687, "learning_rate": 9.665469017750708e-06, "loss": 0.3002, "step": 1476 }, { "epoch": 0.14374695863746959, "grad_norm": 1.678176413091249, "learning_rate": 9.664901948382654e-06, "loss": 0.6143, "step": 1477 }, { "epoch": 0.1438442822384428, "grad_norm": 1.817046546881487, "learning_rate": 9.664334415460426e-06, "loss": 0.7811, "step": 1478 }, { "epoch": 0.14394160583941606, "grad_norm": 1.4955026439922687, "learning_rate": 9.663766419040422e-06, "loss": 0.411, "step": 1479 }, { "epoch": 0.1440389294403893, "grad_norm": 1.4198677231066263, "learning_rate": 9.66319795917908e-06, "loss": 0.4245, "step": 1480 }, { "epoch": 0.14413625304136254, "grad_norm": 1.5199876898969789, "learning_rate": 9.662629035932892e-06, "loss": 0.438, "step": 1481 }, { "epoch": 0.14423357664233577, "grad_norm": 1.4859771113526168, "learning_rate": 9.662059649358388e-06, "loss": 0.3949, "step": 1482 }, { "epoch": 0.144330900243309, "grad_norm": 1.5386966328542977, "learning_rate": 9.661489799512155e-06, "loss": 0.4679, "step": 1483 }, { "epoch": 0.14442822384428225, "grad_norm": 1.2872766782537612, "learning_rate": 9.660919486450813e-06, "loss": 0.2624, "step": 1484 }, { "epoch": 0.14452554744525548, "grad_norm": 1.3276179523832277, "learning_rate": 9.660348710231037e-06, "loss": 0.5476, "step": 1485 }, { "epoch": 0.1446228710462287, "grad_norm": 0.9490583621811937, "learning_rate": 9.659777470909547e-06, "loss": 0.2354, "step": 1486 }, { "epoch": 0.14472019464720196, "grad_norm": 1.3763558898436123, "learning_rate": 9.659205768543104e-06, "loss": 0.4327, "step": 1487 }, { "epoch": 0.14481751824817518, "grad_norm": 1.178366926128956, "learning_rate": 9.658633603188521e-06, "loss": 0.3839, "step": 1488 }, { "epoch": 0.1449148418491484, "grad_norm": 1.3255542333725456, "learning_rate": 9.658060974902653e-06, "loss": 0.3068, "step": 1489 }, { "epoch": 0.14501216545012166, "grad_norm": 1.5998345706772108, "learning_rate": 9.657487883742403e-06, "loss": 0.5432, "step": 1490 }, { "epoch": 0.1451094890510949, "grad_norm": 1.8804975658787435, "learning_rate": 9.656914329764718e-06, "loss": 0.5268, "step": 1491 }, { "epoch": 0.14520681265206814, "grad_norm": 1.5841269093835124, "learning_rate": 9.656340313026595e-06, "loss": 0.6304, "step": 1492 }, { "epoch": 0.14530413625304137, "grad_norm": 1.5832299483159056, "learning_rate": 9.655765833585072e-06, "loss": 0.4417, "step": 1493 }, { "epoch": 0.1454014598540146, "grad_norm": 1.2541361090062475, "learning_rate": 9.655190891497237e-06, "loss": 0.2956, "step": 1494 }, { "epoch": 0.14549878345498785, "grad_norm": 1.4549578520972333, "learning_rate": 9.654615486820223e-06, "loss": 0.5352, "step": 1495 }, { "epoch": 0.14559610705596107, "grad_norm": 1.4797996277102474, "learning_rate": 9.654039619611205e-06, "loss": 0.4915, "step": 1496 }, { "epoch": 0.1456934306569343, "grad_norm": 1.2281886698207842, "learning_rate": 9.65346328992741e-06, "loss": 0.1901, "step": 1497 }, { "epoch": 0.14579075425790755, "grad_norm": 1.4478972545758728, "learning_rate": 9.652886497826109e-06, "loss": 0.4142, "step": 1498 }, { "epoch": 0.14588807785888078, "grad_norm": 1.5883286963945868, "learning_rate": 9.652309243364614e-06, "loss": 0.3576, "step": 1499 }, { "epoch": 0.145985401459854, "grad_norm": 1.5369489845441549, "learning_rate": 9.651731526600293e-06, "loss": 0.5479, "step": 1500 }, { "epoch": 0.14608272506082726, "grad_norm": 1.5655077404950533, "learning_rate": 9.651153347590549e-06, "loss": 0.3464, "step": 1501 }, { "epoch": 0.14618004866180048, "grad_norm": 1.6426065013852054, "learning_rate": 9.65057470639284e-06, "loss": 0.5038, "step": 1502 }, { "epoch": 0.14627737226277374, "grad_norm": 1.8088684532537898, "learning_rate": 9.649995603064664e-06, "loss": 0.5731, "step": 1503 }, { "epoch": 0.14637469586374696, "grad_norm": 1.2493389766016731, "learning_rate": 9.649416037663564e-06, "loss": 0.3306, "step": 1504 }, { "epoch": 0.1464720194647202, "grad_norm": 1.5964615139072293, "learning_rate": 9.648836010247137e-06, "loss": 0.4169, "step": 1505 }, { "epoch": 0.14656934306569344, "grad_norm": 1.3925830899828215, "learning_rate": 9.648255520873018e-06, "loss": 0.3092, "step": 1506 }, { "epoch": 0.14666666666666667, "grad_norm": 1.4256860988690832, "learning_rate": 9.647674569598889e-06, "loss": 0.3201, "step": 1507 }, { "epoch": 0.1467639902676399, "grad_norm": 1.6614703553660697, "learning_rate": 9.647093156482483e-06, "loss": 0.6078, "step": 1508 }, { "epoch": 0.14686131386861315, "grad_norm": 1.4357097092225446, "learning_rate": 9.646511281581575e-06, "loss": 0.4004, "step": 1509 }, { "epoch": 0.14695863746958637, "grad_norm": 1.4562846462074024, "learning_rate": 9.645928944953981e-06, "loss": 0.4601, "step": 1510 }, { "epoch": 0.1470559610705596, "grad_norm": 1.3277232740610976, "learning_rate": 9.645346146657575e-06, "loss": 0.4015, "step": 1511 }, { "epoch": 0.14715328467153285, "grad_norm": 1.5964514332978577, "learning_rate": 9.644762886750267e-06, "loss": 0.4556, "step": 1512 }, { "epoch": 0.14725060827250608, "grad_norm": 1.4663379423625913, "learning_rate": 9.644179165290015e-06, "loss": 0.4353, "step": 1513 }, { "epoch": 0.14734793187347933, "grad_norm": 1.0949765548634744, "learning_rate": 9.643594982334826e-06, "loss": 0.2276, "step": 1514 }, { "epoch": 0.14744525547445256, "grad_norm": 1.563845779693575, "learning_rate": 9.643010337942749e-06, "loss": 0.6694, "step": 1515 }, { "epoch": 0.14754257907542578, "grad_norm": 1.024413538842663, "learning_rate": 9.642425232171881e-06, "loss": 0.3047, "step": 1516 }, { "epoch": 0.14763990267639904, "grad_norm": 1.712866405633365, "learning_rate": 9.641839665080363e-06, "loss": 0.6729, "step": 1517 }, { "epoch": 0.14773722627737226, "grad_norm": 1.4526258041869373, "learning_rate": 9.641253636726386e-06, "loss": 0.5037, "step": 1518 }, { "epoch": 0.1478345498783455, "grad_norm": 1.7375410582816389, "learning_rate": 9.640667147168182e-06, "loss": 0.6717, "step": 1519 }, { "epoch": 0.14793187347931874, "grad_norm": 1.736227335112512, "learning_rate": 9.640080196464032e-06, "loss": 0.6677, "step": 1520 }, { "epoch": 0.14802919708029197, "grad_norm": 1.5194007013329096, "learning_rate": 9.63949278467226e-06, "loss": 0.4288, "step": 1521 }, { "epoch": 0.1481265206812652, "grad_norm": 1.5063763039212688, "learning_rate": 9.638904911851237e-06, "loss": 0.4529, "step": 1522 }, { "epoch": 0.14822384428223845, "grad_norm": 1.6414876214129155, "learning_rate": 9.638316578059384e-06, "loss": 0.5482, "step": 1523 }, { "epoch": 0.14832116788321167, "grad_norm": 1.3122113228270877, "learning_rate": 9.63772778335516e-06, "loss": 0.3903, "step": 1524 }, { "epoch": 0.14841849148418493, "grad_norm": 1.6417051120393822, "learning_rate": 9.637138527797075e-06, "loss": 0.654, "step": 1525 }, { "epoch": 0.14851581508515815, "grad_norm": 1.2700043251684836, "learning_rate": 9.636548811443685e-06, "loss": 0.3338, "step": 1526 }, { "epoch": 0.14861313868613138, "grad_norm": 1.4124836827913858, "learning_rate": 9.63595863435359e-06, "loss": 0.3551, "step": 1527 }, { "epoch": 0.14871046228710463, "grad_norm": 1.3732601776051463, "learning_rate": 9.635367996585436e-06, "loss": 0.4212, "step": 1528 }, { "epoch": 0.14880778588807786, "grad_norm": 1.4785898006079692, "learning_rate": 9.634776898197916e-06, "loss": 0.416, "step": 1529 }, { "epoch": 0.14890510948905109, "grad_norm": 1.5889313350171215, "learning_rate": 9.634185339249766e-06, "loss": 0.5277, "step": 1530 }, { "epoch": 0.14900243309002434, "grad_norm": 1.7475817866143981, "learning_rate": 9.63359331979977e-06, "loss": 0.5202, "step": 1531 }, { "epoch": 0.14909975669099756, "grad_norm": 1.5329427899001755, "learning_rate": 9.633000839906758e-06, "loss": 0.4283, "step": 1532 }, { "epoch": 0.1491970802919708, "grad_norm": 1.3789605408265815, "learning_rate": 9.632407899629606e-06, "loss": 0.41, "step": 1533 }, { "epoch": 0.14929440389294404, "grad_norm": 1.725959361785896, "learning_rate": 9.631814499027233e-06, "loss": 0.6289, "step": 1534 }, { "epoch": 0.14939172749391727, "grad_norm": 1.5432692609357797, "learning_rate": 9.631220638158605e-06, "loss": 0.5, "step": 1535 }, { "epoch": 0.14948905109489052, "grad_norm": 1.6556108789068573, "learning_rate": 9.630626317082737e-06, "loss": 0.3819, "step": 1536 }, { "epoch": 0.14958637469586375, "grad_norm": 1.4498977098887442, "learning_rate": 9.630031535858686e-06, "loss": 0.4317, "step": 1537 }, { "epoch": 0.14968369829683698, "grad_norm": 1.1232180788321369, "learning_rate": 9.629436294545555e-06, "loss": 0.4004, "step": 1538 }, { "epoch": 0.14978102189781023, "grad_norm": 0.9949950497949807, "learning_rate": 9.628840593202494e-06, "loss": 0.2008, "step": 1539 }, { "epoch": 0.14987834549878346, "grad_norm": 1.431426278132333, "learning_rate": 9.628244431888699e-06, "loss": 0.3689, "step": 1540 }, { "epoch": 0.14997566909975668, "grad_norm": 1.575987397356523, "learning_rate": 9.627647810663407e-06, "loss": 0.5513, "step": 1541 }, { "epoch": 0.15007299270072993, "grad_norm": 1.5419042077794642, "learning_rate": 9.627050729585911e-06, "loss": 0.4614, "step": 1542 }, { "epoch": 0.15017031630170316, "grad_norm": 1.6695059275012083, "learning_rate": 9.626453188715539e-06, "loss": 0.5111, "step": 1543 }, { "epoch": 0.1502676399026764, "grad_norm": 1.5402255238707527, "learning_rate": 9.625855188111668e-06, "loss": 0.4209, "step": 1544 }, { "epoch": 0.15036496350364964, "grad_norm": 1.4645797288107798, "learning_rate": 9.625256727833726e-06, "loss": 0.4852, "step": 1545 }, { "epoch": 0.15046228710462287, "grad_norm": 2.0138530187225845, "learning_rate": 9.62465780794118e-06, "loss": 0.4272, "step": 1546 }, { "epoch": 0.15055961070559612, "grad_norm": 1.7939871096323345, "learning_rate": 9.624058428493543e-06, "loss": 0.3864, "step": 1547 }, { "epoch": 0.15065693430656935, "grad_norm": 1.5936734798237622, "learning_rate": 9.62345858955038e-06, "loss": 0.5951, "step": 1548 }, { "epoch": 0.15075425790754257, "grad_norm": 1.381736638575513, "learning_rate": 9.622858291171295e-06, "loss": 0.5078, "step": 1549 }, { "epoch": 0.15085158150851583, "grad_norm": 1.2680468052820635, "learning_rate": 9.622257533415939e-06, "loss": 0.3314, "step": 1550 }, { "epoch": 0.15094890510948905, "grad_norm": 1.5886359348363517, "learning_rate": 9.621656316344011e-06, "loss": 0.5985, "step": 1551 }, { "epoch": 0.15104622871046228, "grad_norm": 1.631001321245941, "learning_rate": 9.621054640015255e-06, "loss": 0.6297, "step": 1552 }, { "epoch": 0.15114355231143553, "grad_norm": 1.7004985330783402, "learning_rate": 9.62045250448946e-06, "loss": 0.5153, "step": 1553 }, { "epoch": 0.15124087591240876, "grad_norm": 1.414050528965644, "learning_rate": 9.619849909826457e-06, "loss": 0.2651, "step": 1554 }, { "epoch": 0.15133819951338198, "grad_norm": 1.3361950007111751, "learning_rate": 9.61924685608613e-06, "loss": 0.4179, "step": 1555 }, { "epoch": 0.15143552311435524, "grad_norm": 1.2305020766816175, "learning_rate": 9.618643343328404e-06, "loss": 0.3342, "step": 1556 }, { "epoch": 0.15153284671532846, "grad_norm": 1.3364057110807985, "learning_rate": 9.618039371613251e-06, "loss": 0.357, "step": 1557 }, { "epoch": 0.15163017031630172, "grad_norm": 0.9846564904659728, "learning_rate": 9.617434941000685e-06, "loss": 0.2278, "step": 1558 }, { "epoch": 0.15172749391727494, "grad_norm": 1.4874184978820846, "learning_rate": 9.616830051550772e-06, "loss": 0.4467, "step": 1559 }, { "epoch": 0.15182481751824817, "grad_norm": 1.79907754997464, "learning_rate": 9.61622470332362e-06, "loss": 0.5501, "step": 1560 }, { "epoch": 0.15192214111922142, "grad_norm": 1.2290536645357835, "learning_rate": 9.61561889637938e-06, "loss": 0.3149, "step": 1561 }, { "epoch": 0.15201946472019465, "grad_norm": 1.5048179340178087, "learning_rate": 9.615012630778254e-06, "loss": 0.5367, "step": 1562 }, { "epoch": 0.15211678832116787, "grad_norm": 1.387431259858161, "learning_rate": 9.614405906580486e-06, "loss": 0.4953, "step": 1563 }, { "epoch": 0.15221411192214113, "grad_norm": 1.4159610711473967, "learning_rate": 9.613798723846368e-06, "loss": 0.454, "step": 1564 }, { "epoch": 0.15231143552311435, "grad_norm": 1.2005509919566202, "learning_rate": 9.613191082636235e-06, "loss": 0.3945, "step": 1565 }, { "epoch": 0.15240875912408758, "grad_norm": 1.518451218591156, "learning_rate": 9.612582983010468e-06, "loss": 0.42, "step": 1566 }, { "epoch": 0.15250608272506083, "grad_norm": 1.2817177267697137, "learning_rate": 9.611974425029494e-06, "loss": 0.4119, "step": 1567 }, { "epoch": 0.15260340632603406, "grad_norm": 1.3182769071429, "learning_rate": 9.611365408753787e-06, "loss": 0.4301, "step": 1568 }, { "epoch": 0.1527007299270073, "grad_norm": 1.2668371165350867, "learning_rate": 9.610755934243864e-06, "loss": 0.3415, "step": 1569 }, { "epoch": 0.15279805352798054, "grad_norm": 1.334265705787435, "learning_rate": 9.610146001560293e-06, "loss": 0.325, "step": 1570 }, { "epoch": 0.15289537712895376, "grad_norm": 1.405628575667756, "learning_rate": 9.609535610763678e-06, "loss": 0.4, "step": 1571 }, { "epoch": 0.15299270072992702, "grad_norm": 1.5931859233666277, "learning_rate": 9.608924761914677e-06, "loss": 0.643, "step": 1572 }, { "epoch": 0.15309002433090024, "grad_norm": 1.323715339329346, "learning_rate": 9.608313455073989e-06, "loss": 0.4832, "step": 1573 }, { "epoch": 0.15318734793187347, "grad_norm": 1.1603088792271297, "learning_rate": 9.60770169030236e-06, "loss": 0.2684, "step": 1574 }, { "epoch": 0.15328467153284672, "grad_norm": 1.4578030666688024, "learning_rate": 9.607089467660581e-06, "loss": 0.4418, "step": 1575 }, { "epoch": 0.15338199513381995, "grad_norm": 1.2739086566679132, "learning_rate": 9.606476787209493e-06, "loss": 0.3847, "step": 1576 }, { "epoch": 0.15347931873479317, "grad_norm": 1.4031538044918876, "learning_rate": 9.605863649009973e-06, "loss": 0.3672, "step": 1577 }, { "epoch": 0.15357664233576643, "grad_norm": 1.473592849907526, "learning_rate": 9.605250053122951e-06, "loss": 0.3955, "step": 1578 }, { "epoch": 0.15367396593673965, "grad_norm": 1.6950520258208177, "learning_rate": 9.604635999609402e-06, "loss": 0.6923, "step": 1579 }, { "epoch": 0.1537712895377129, "grad_norm": 1.6074239515288835, "learning_rate": 9.604021488530342e-06, "loss": 0.4771, "step": 1580 }, { "epoch": 0.15386861313868613, "grad_norm": 1.5289432511411145, "learning_rate": 9.603406519946838e-06, "loss": 0.5881, "step": 1581 }, { "epoch": 0.15396593673965936, "grad_norm": 1.3225323677068181, "learning_rate": 9.602791093919998e-06, "loss": 0.3128, "step": 1582 }, { "epoch": 0.1540632603406326, "grad_norm": 1.467417498061456, "learning_rate": 9.60217521051098e-06, "loss": 0.545, "step": 1583 }, { "epoch": 0.15416058394160584, "grad_norm": 1.7568491012309082, "learning_rate": 9.60155886978098e-06, "loss": 0.7054, "step": 1584 }, { "epoch": 0.15425790754257906, "grad_norm": 1.5606257069028109, "learning_rate": 9.600942071791248e-06, "loss": 0.4329, "step": 1585 }, { "epoch": 0.15435523114355232, "grad_norm": 1.5727160833264413, "learning_rate": 9.600324816603074e-06, "loss": 0.6128, "step": 1586 }, { "epoch": 0.15445255474452554, "grad_norm": 1.3864503412663605, "learning_rate": 9.599707104277796e-06, "loss": 0.573, "step": 1587 }, { "epoch": 0.15454987834549877, "grad_norm": 1.4232761061254342, "learning_rate": 9.599088934876794e-06, "loss": 0.4136, "step": 1588 }, { "epoch": 0.15464720194647202, "grad_norm": 1.3399427727677786, "learning_rate": 9.598470308461499e-06, "loss": 0.257, "step": 1589 }, { "epoch": 0.15474452554744525, "grad_norm": 1.61635763649276, "learning_rate": 9.597851225093382e-06, "loss": 0.566, "step": 1590 }, { "epoch": 0.1548418491484185, "grad_norm": 1.6304164262097627, "learning_rate": 9.597231684833964e-06, "loss": 0.3673, "step": 1591 }, { "epoch": 0.15493917274939173, "grad_norm": 1.4592987498064005, "learning_rate": 9.596611687744807e-06, "loss": 0.5193, "step": 1592 }, { "epoch": 0.15503649635036496, "grad_norm": 1.4397292060019447, "learning_rate": 9.595991233887523e-06, "loss": 0.3236, "step": 1593 }, { "epoch": 0.1551338199513382, "grad_norm": 1.2246835494507005, "learning_rate": 9.595370323323763e-06, "loss": 0.2397, "step": 1594 }, { "epoch": 0.15523114355231143, "grad_norm": 1.530797619071646, "learning_rate": 9.59474895611523e-06, "loss": 0.4537, "step": 1595 }, { "epoch": 0.15532846715328466, "grad_norm": 0.9400393110536889, "learning_rate": 9.594127132323669e-06, "loss": 0.1899, "step": 1596 }, { "epoch": 0.15542579075425791, "grad_norm": 1.167634539806263, "learning_rate": 9.593504852010872e-06, "loss": 0.353, "step": 1597 }, { "epoch": 0.15552311435523114, "grad_norm": 1.6772160290018319, "learning_rate": 9.592882115238675e-06, "loss": 0.4194, "step": 1598 }, { "epoch": 0.15562043795620437, "grad_norm": 1.4391641520861267, "learning_rate": 9.592258922068958e-06, "loss": 0.4767, "step": 1599 }, { "epoch": 0.15571776155717762, "grad_norm": 1.544673007447179, "learning_rate": 9.591635272563648e-06, "loss": 0.3175, "step": 1600 }, { "epoch": 0.15581508515815085, "grad_norm": 1.4189512773822923, "learning_rate": 9.591011166784721e-06, "loss": 0.4834, "step": 1601 }, { "epoch": 0.1559124087591241, "grad_norm": 1.2414753149853184, "learning_rate": 9.590386604794191e-06, "loss": 0.3657, "step": 1602 }, { "epoch": 0.15600973236009733, "grad_norm": 1.0236785255419305, "learning_rate": 9.589761586654122e-06, "loss": 0.2011, "step": 1603 }, { "epoch": 0.15610705596107055, "grad_norm": 2.3461369884265357, "learning_rate": 9.589136112426625e-06, "loss": 0.4024, "step": 1604 }, { "epoch": 0.1562043795620438, "grad_norm": 1.2849479900774115, "learning_rate": 9.588510182173851e-06, "loss": 0.3527, "step": 1605 }, { "epoch": 0.15630170316301703, "grad_norm": 1.4153286655317308, "learning_rate": 9.587883795958001e-06, "loss": 0.4149, "step": 1606 }, { "epoch": 0.15639902676399026, "grad_norm": 1.6599870662874754, "learning_rate": 9.587256953841317e-06, "loss": 0.6479, "step": 1607 }, { "epoch": 0.1564963503649635, "grad_norm": 1.6670860080877101, "learning_rate": 9.58662965588609e-06, "loss": 0.5825, "step": 1608 }, { "epoch": 0.15659367396593674, "grad_norm": 1.7776280437765584, "learning_rate": 9.586001902154655e-06, "loss": 0.5798, "step": 1609 }, { "epoch": 0.15669099756690996, "grad_norm": 1.5456297515043347, "learning_rate": 9.585373692709391e-06, "loss": 0.4583, "step": 1610 }, { "epoch": 0.15678832116788322, "grad_norm": 1.8806083091738082, "learning_rate": 9.584745027612728e-06, "loss": 0.4736, "step": 1611 }, { "epoch": 0.15688564476885644, "grad_norm": 1.4790926453601037, "learning_rate": 9.584115906927131e-06, "loss": 0.4172, "step": 1612 }, { "epoch": 0.1569829683698297, "grad_norm": 3.3021500316987633, "learning_rate": 9.58348633071512e-06, "loss": 0.472, "step": 1613 }, { "epoch": 0.15708029197080292, "grad_norm": 1.860435632122749, "learning_rate": 9.582856299039253e-06, "loss": 0.4743, "step": 1614 }, { "epoch": 0.15717761557177615, "grad_norm": 1.7557070181222967, "learning_rate": 9.58222581196214e-06, "loss": 0.2907, "step": 1615 }, { "epoch": 0.1572749391727494, "grad_norm": 1.5588238003780286, "learning_rate": 9.581594869546433e-06, "loss": 0.3803, "step": 1616 }, { "epoch": 0.15737226277372263, "grad_norm": 1.5265824940366777, "learning_rate": 9.580963471854825e-06, "loss": 0.3163, "step": 1617 }, { "epoch": 0.15746958637469585, "grad_norm": 1.5425233608560427, "learning_rate": 9.580331618950063e-06, "loss": 0.3884, "step": 1618 }, { "epoch": 0.1575669099756691, "grad_norm": 1.4123635386488018, "learning_rate": 9.579699310894932e-06, "loss": 0.382, "step": 1619 }, { "epoch": 0.15766423357664233, "grad_norm": 1.578019469103596, "learning_rate": 9.579066547752266e-06, "loss": 0.4293, "step": 1620 }, { "epoch": 0.15776155717761559, "grad_norm": 1.6566990657429592, "learning_rate": 9.578433329584943e-06, "loss": 0.2878, "step": 1621 }, { "epoch": 0.1578588807785888, "grad_norm": 1.5290043771605026, "learning_rate": 9.577799656455886e-06, "loss": 0.4483, "step": 1622 }, { "epoch": 0.15795620437956204, "grad_norm": 1.7268752423292135, "learning_rate": 9.577165528428063e-06, "loss": 0.4805, "step": 1623 }, { "epoch": 0.1580535279805353, "grad_norm": 1.3495189675110832, "learning_rate": 9.576530945564488e-06, "loss": 0.3161, "step": 1624 }, { "epoch": 0.15815085158150852, "grad_norm": 1.4763829359235794, "learning_rate": 9.575895907928218e-06, "loss": 0.4825, "step": 1625 }, { "epoch": 0.15824817518248174, "grad_norm": 1.686991367686583, "learning_rate": 9.575260415582362e-06, "loss": 0.3016, "step": 1626 }, { "epoch": 0.158345498783455, "grad_norm": 1.3390220591470878, "learning_rate": 9.574624468590065e-06, "loss": 0.4523, "step": 1627 }, { "epoch": 0.15844282238442822, "grad_norm": 1.8698808087393168, "learning_rate": 9.573988067014523e-06, "loss": 0.5203, "step": 1628 }, { "epoch": 0.15854014598540145, "grad_norm": 1.4032165021732874, "learning_rate": 9.573351210918976e-06, "loss": 0.3678, "step": 1629 }, { "epoch": 0.1586374695863747, "grad_norm": 1.4017015011859046, "learning_rate": 9.572713900366707e-06, "loss": 0.2798, "step": 1630 }, { "epoch": 0.15873479318734793, "grad_norm": 1.4441030854971395, "learning_rate": 9.572076135421048e-06, "loss": 0.3514, "step": 1631 }, { "epoch": 0.15883211678832118, "grad_norm": 1.3629792761623065, "learning_rate": 9.571437916145373e-06, "loss": 0.4604, "step": 1632 }, { "epoch": 0.1589294403892944, "grad_norm": 1.376972344446985, "learning_rate": 9.570799242603101e-06, "loss": 0.4603, "step": 1633 }, { "epoch": 0.15902676399026763, "grad_norm": 1.5637421057827365, "learning_rate": 9.5701601148577e-06, "loss": 0.5575, "step": 1634 }, { "epoch": 0.1591240875912409, "grad_norm": 1.4338457681188446, "learning_rate": 9.56952053297268e-06, "loss": 0.532, "step": 1635 }, { "epoch": 0.1592214111922141, "grad_norm": 1.4858651962900338, "learning_rate": 9.568880497011597e-06, "loss": 0.4951, "step": 1636 }, { "epoch": 0.15931873479318734, "grad_norm": 1.543423201839799, "learning_rate": 9.568240007038048e-06, "loss": 0.5278, "step": 1637 }, { "epoch": 0.1594160583941606, "grad_norm": 1.408319688012345, "learning_rate": 9.567599063115683e-06, "loss": 0.4474, "step": 1638 }, { "epoch": 0.15951338199513382, "grad_norm": 1.2680346779127702, "learning_rate": 9.566957665308192e-06, "loss": 0.3351, "step": 1639 }, { "epoch": 0.15961070559610704, "grad_norm": 1.6277797838197976, "learning_rate": 9.56631581367931e-06, "loss": 0.3966, "step": 1640 }, { "epoch": 0.1597080291970803, "grad_norm": 1.5248977314161354, "learning_rate": 9.565673508292818e-06, "loss": 0.5211, "step": 1641 }, { "epoch": 0.15980535279805352, "grad_norm": 1.7164012466100764, "learning_rate": 9.565030749212546e-06, "loss": 0.5428, "step": 1642 }, { "epoch": 0.15990267639902678, "grad_norm": 1.6687081549609284, "learning_rate": 9.56438753650236e-06, "loss": 0.2936, "step": 1643 }, { "epoch": 0.16, "grad_norm": 1.5678110268585723, "learning_rate": 9.56374387022618e-06, "loss": 0.5166, "step": 1644 }, { "epoch": 0.16009732360097323, "grad_norm": 1.6983019931785335, "learning_rate": 9.563099750447966e-06, "loss": 0.4822, "step": 1645 }, { "epoch": 0.16019464720194648, "grad_norm": 1.4431824530543444, "learning_rate": 9.562455177231726e-06, "loss": 0.3212, "step": 1646 }, { "epoch": 0.1602919708029197, "grad_norm": 3.712828208723791, "learning_rate": 9.56181015064151e-06, "loss": 0.4286, "step": 1647 }, { "epoch": 0.16038929440389293, "grad_norm": 1.4388083433357408, "learning_rate": 9.561164670741416e-06, "loss": 0.3757, "step": 1648 }, { "epoch": 0.1604866180048662, "grad_norm": 2.4878081586110117, "learning_rate": 9.560518737595586e-06, "loss": 0.3494, "step": 1649 }, { "epoch": 0.16058394160583941, "grad_norm": 2.3091262745384706, "learning_rate": 9.559872351268205e-06, "loss": 0.4607, "step": 1650 }, { "epoch": 0.16068126520681264, "grad_norm": 1.6632563827899045, "learning_rate": 9.559225511823504e-06, "loss": 0.5718, "step": 1651 }, { "epoch": 0.1607785888077859, "grad_norm": 1.6138862417611177, "learning_rate": 9.558578219325763e-06, "loss": 0.325, "step": 1652 }, { "epoch": 0.16087591240875912, "grad_norm": 1.1933317040764397, "learning_rate": 9.557930473839303e-06, "loss": 0.339, "step": 1653 }, { "epoch": 0.16097323600973237, "grad_norm": 0.9728312200944081, "learning_rate": 9.55728227542849e-06, "loss": 0.2395, "step": 1654 }, { "epoch": 0.1610705596107056, "grad_norm": 1.5521742092214053, "learning_rate": 9.556633624157735e-06, "loss": 0.4613, "step": 1655 }, { "epoch": 0.16116788321167883, "grad_norm": 1.639740187603822, "learning_rate": 9.555984520091497e-06, "loss": 0.5146, "step": 1656 }, { "epoch": 0.16126520681265208, "grad_norm": 1.5387772039120604, "learning_rate": 9.555334963294277e-06, "loss": 0.4879, "step": 1657 }, { "epoch": 0.1613625304136253, "grad_norm": 1.2788374913210725, "learning_rate": 9.554684953830622e-06, "loss": 0.2115, "step": 1658 }, { "epoch": 0.16145985401459853, "grad_norm": 1.2466060338770748, "learning_rate": 9.554034491765123e-06, "loss": 0.4057, "step": 1659 }, { "epoch": 0.16155717761557178, "grad_norm": 1.3626765355526065, "learning_rate": 9.553383577162418e-06, "loss": 0.3922, "step": 1660 }, { "epoch": 0.161654501216545, "grad_norm": 1.4993759287568524, "learning_rate": 9.552732210087188e-06, "loss": 0.5101, "step": 1661 }, { "epoch": 0.16175182481751824, "grad_norm": 1.4132678080310175, "learning_rate": 9.55208039060416e-06, "loss": 0.4098, "step": 1662 }, { "epoch": 0.1618491484184915, "grad_norm": 1.3072203759845393, "learning_rate": 9.551428118778105e-06, "loss": 0.4437, "step": 1663 }, { "epoch": 0.16194647201946472, "grad_norm": 1.4197615961970556, "learning_rate": 9.550775394673841e-06, "loss": 0.4855, "step": 1664 }, { "epoch": 0.16204379562043797, "grad_norm": 1.1443578178578404, "learning_rate": 9.550122218356228e-06, "loss": 0.2651, "step": 1665 }, { "epoch": 0.1621411192214112, "grad_norm": 1.6274953169982382, "learning_rate": 9.549468589890173e-06, "loss": 0.5702, "step": 1666 }, { "epoch": 0.16223844282238442, "grad_norm": 1.5542252970145625, "learning_rate": 9.548814509340631e-06, "loss": 0.3618, "step": 1667 }, { "epoch": 0.16233576642335767, "grad_norm": 1.5872588267319008, "learning_rate": 9.548159976772593e-06, "loss": 0.5261, "step": 1668 }, { "epoch": 0.1624330900243309, "grad_norm": 1.1735078752446053, "learning_rate": 9.547504992251102e-06, "loss": 0.2709, "step": 1669 }, { "epoch": 0.16253041362530413, "grad_norm": 1.8057871189139236, "learning_rate": 9.546849555841247e-06, "loss": 0.3383, "step": 1670 }, { "epoch": 0.16262773722627738, "grad_norm": 1.4181568031561294, "learning_rate": 9.546193667608155e-06, "loss": 0.4654, "step": 1671 }, { "epoch": 0.1627250608272506, "grad_norm": 1.3372190697374011, "learning_rate": 9.545537327617004e-06, "loss": 0.4098, "step": 1672 }, { "epoch": 0.16282238442822383, "grad_norm": 1.4054977948345526, "learning_rate": 9.544880535933015e-06, "loss": 0.488, "step": 1673 }, { "epoch": 0.16291970802919709, "grad_norm": 1.8103202340533562, "learning_rate": 9.544223292621456e-06, "loss": 0.2989, "step": 1674 }, { "epoch": 0.1630170316301703, "grad_norm": 1.4424657055300307, "learning_rate": 9.543565597747633e-06, "loss": 0.3545, "step": 1675 }, { "epoch": 0.16311435523114357, "grad_norm": 1.712897793310079, "learning_rate": 9.542907451376904e-06, "loss": 0.4372, "step": 1676 }, { "epoch": 0.1632116788321168, "grad_norm": 1.5856342495538354, "learning_rate": 9.542248853574669e-06, "loss": 0.3552, "step": 1677 }, { "epoch": 0.16330900243309002, "grad_norm": 1.6070757988154845, "learning_rate": 9.541589804406373e-06, "loss": 0.6297, "step": 1678 }, { "epoch": 0.16340632603406327, "grad_norm": 1.4030835423791206, "learning_rate": 9.540930303937508e-06, "loss": 0.5304, "step": 1679 }, { "epoch": 0.1635036496350365, "grad_norm": 1.1629420270697914, "learning_rate": 9.540270352233607e-06, "loss": 0.3196, "step": 1680 }, { "epoch": 0.16360097323600972, "grad_norm": 1.6438421767465334, "learning_rate": 9.53960994936025e-06, "loss": 0.5718, "step": 1681 }, { "epoch": 0.16369829683698298, "grad_norm": 1.4972655485667212, "learning_rate": 9.538949095383064e-06, "loss": 0.5411, "step": 1682 }, { "epoch": 0.1637956204379562, "grad_norm": 1.6855463092047138, "learning_rate": 9.538287790367715e-06, "loss": 0.4072, "step": 1683 }, { "epoch": 0.16389294403892943, "grad_norm": 1.3024464622228382, "learning_rate": 9.537626034379918e-06, "loss": 0.3779, "step": 1684 }, { "epoch": 0.16399026763990268, "grad_norm": 1.295189693137423, "learning_rate": 9.536963827485435e-06, "loss": 0.3687, "step": 1685 }, { "epoch": 0.1640875912408759, "grad_norm": 1.4535138830119652, "learning_rate": 9.536301169750068e-06, "loss": 0.4548, "step": 1686 }, { "epoch": 0.16418491484184916, "grad_norm": 1.199213729997, "learning_rate": 9.535638061239663e-06, "loss": 0.2053, "step": 1687 }, { "epoch": 0.1642822384428224, "grad_norm": 1.5567691993981325, "learning_rate": 9.534974502020117e-06, "loss": 0.4098, "step": 1688 }, { "epoch": 0.1643795620437956, "grad_norm": 1.5701473016338705, "learning_rate": 9.534310492157368e-06, "loss": 0.4663, "step": 1689 }, { "epoch": 0.16447688564476887, "grad_norm": 1.4652608455665965, "learning_rate": 9.533646031717398e-06, "loss": 0.423, "step": 1690 }, { "epoch": 0.1645742092457421, "grad_norm": 1.556818972222242, "learning_rate": 9.532981120766235e-06, "loss": 0.5823, "step": 1691 }, { "epoch": 0.16467153284671532, "grad_norm": 1.3176167070500389, "learning_rate": 9.532315759369953e-06, "loss": 0.3369, "step": 1692 }, { "epoch": 0.16476885644768857, "grad_norm": 1.710131590392248, "learning_rate": 9.531649947594668e-06, "loss": 0.6235, "step": 1693 }, { "epoch": 0.1648661800486618, "grad_norm": 1.316452070848038, "learning_rate": 9.53098368550654e-06, "loss": 0.2773, "step": 1694 }, { "epoch": 0.16496350364963502, "grad_norm": 1.3144552108952152, "learning_rate": 9.53031697317178e-06, "loss": 0.4008, "step": 1695 }, { "epoch": 0.16506082725060828, "grad_norm": 1.6242845867808264, "learning_rate": 9.529649810656638e-06, "loss": 0.4994, "step": 1696 }, { "epoch": 0.1651581508515815, "grad_norm": 1.285181340955318, "learning_rate": 9.52898219802741e-06, "loss": 0.3565, "step": 1697 }, { "epoch": 0.16525547445255476, "grad_norm": 1.5859120183692204, "learning_rate": 9.528314135350439e-06, "loss": 0.6057, "step": 1698 }, { "epoch": 0.16535279805352798, "grad_norm": 1.2413369391689792, "learning_rate": 9.527645622692105e-06, "loss": 0.2912, "step": 1699 }, { "epoch": 0.1654501216545012, "grad_norm": 1.5626898078072964, "learning_rate": 9.526976660118846e-06, "loss": 0.4912, "step": 1700 }, { "epoch": 0.16554744525547446, "grad_norm": 1.355302168314411, "learning_rate": 9.526307247697133e-06, "loss": 0.4066, "step": 1701 }, { "epoch": 0.1656447688564477, "grad_norm": 1.6754743388370108, "learning_rate": 9.525637385493485e-06, "loss": 0.4402, "step": 1702 }, { "epoch": 0.16574209245742091, "grad_norm": 1.4378330010865907, "learning_rate": 9.524967073574468e-06, "loss": 0.3896, "step": 1703 }, { "epoch": 0.16583941605839417, "grad_norm": 1.5562357645264613, "learning_rate": 9.524296312006696e-06, "loss": 0.7178, "step": 1704 }, { "epoch": 0.1659367396593674, "grad_norm": 1.4997676033555023, "learning_rate": 9.523625100856814e-06, "loss": 0.5203, "step": 1705 }, { "epoch": 0.16603406326034062, "grad_norm": 1.39039181243628, "learning_rate": 9.522953440191528e-06, "loss": 0.4804, "step": 1706 }, { "epoch": 0.16613138686131387, "grad_norm": 1.2594698773182105, "learning_rate": 9.522281330077579e-06, "loss": 0.31, "step": 1707 }, { "epoch": 0.1662287104622871, "grad_norm": 1.5394103920539104, "learning_rate": 9.521608770581751e-06, "loss": 0.4579, "step": 1708 }, { "epoch": 0.16632603406326035, "grad_norm": 1.4703967014570463, "learning_rate": 9.520935761770885e-06, "loss": 0.4732, "step": 1709 }, { "epoch": 0.16642335766423358, "grad_norm": 1.0444153315520046, "learning_rate": 9.520262303711851e-06, "loss": 0.2468, "step": 1710 }, { "epoch": 0.1665206812652068, "grad_norm": 1.4440019594110525, "learning_rate": 9.519588396471572e-06, "loss": 0.4979, "step": 1711 }, { "epoch": 0.16661800486618006, "grad_norm": 1.6467368949298022, "learning_rate": 9.518914040117018e-06, "loss": 0.603, "step": 1712 }, { "epoch": 0.16671532846715328, "grad_norm": 1.656027868957794, "learning_rate": 9.518239234715198e-06, "loss": 0.3534, "step": 1713 }, { "epoch": 0.1668126520681265, "grad_norm": 1.409360793352949, "learning_rate": 9.517563980333169e-06, "loss": 0.4442, "step": 1714 }, { "epoch": 0.16690997566909976, "grad_norm": 1.4429795690770129, "learning_rate": 9.51688827703803e-06, "loss": 0.4347, "step": 1715 }, { "epoch": 0.167007299270073, "grad_norm": 1.2256612199861667, "learning_rate": 9.516212124896926e-06, "loss": 0.3582, "step": 1716 }, { "epoch": 0.16710462287104622, "grad_norm": 1.340106815948813, "learning_rate": 9.515535523977047e-06, "loss": 0.4494, "step": 1717 }, { "epoch": 0.16720194647201947, "grad_norm": 1.8033632646616307, "learning_rate": 9.514858474345628e-06, "loss": 0.7254, "step": 1718 }, { "epoch": 0.1672992700729927, "grad_norm": 1.461471704742246, "learning_rate": 9.514180976069948e-06, "loss": 0.4431, "step": 1719 }, { "epoch": 0.16739659367396595, "grad_norm": 1.8149337871023152, "learning_rate": 9.513503029217329e-06, "loss": 0.6808, "step": 1720 }, { "epoch": 0.16749391727493917, "grad_norm": 1.4317488687976054, "learning_rate": 9.51282463385514e-06, "loss": 0.3969, "step": 1721 }, { "epoch": 0.1675912408759124, "grad_norm": 1.406660867644435, "learning_rate": 9.512145790050793e-06, "loss": 0.4466, "step": 1722 }, { "epoch": 0.16768856447688565, "grad_norm": 1.5087949092220858, "learning_rate": 9.511466497871747e-06, "loss": 0.3588, "step": 1723 }, { "epoch": 0.16778588807785888, "grad_norm": 1.3780878680496882, "learning_rate": 9.5107867573855e-06, "loss": 0.4136, "step": 1724 }, { "epoch": 0.1678832116788321, "grad_norm": 1.1785521443758606, "learning_rate": 9.510106568659601e-06, "loss": 0.3319, "step": 1725 }, { "epoch": 0.16798053527980536, "grad_norm": 1.4150065437408217, "learning_rate": 9.50942593176164e-06, "loss": 0.3619, "step": 1726 }, { "epoch": 0.16807785888077859, "grad_norm": 1.5810685607791577, "learning_rate": 9.508744846759254e-06, "loss": 0.5204, "step": 1727 }, { "epoch": 0.1681751824817518, "grad_norm": 1.5507123725258296, "learning_rate": 9.50806331372012e-06, "loss": 0.3017, "step": 1728 }, { "epoch": 0.16827250608272507, "grad_norm": 1.7448176899198176, "learning_rate": 9.507381332711963e-06, "loss": 0.6488, "step": 1729 }, { "epoch": 0.1683698296836983, "grad_norm": 2.0203041353812243, "learning_rate": 9.506698903802553e-06, "loss": 0.2868, "step": 1730 }, { "epoch": 0.16846715328467154, "grad_norm": 1.425557408986151, "learning_rate": 9.506016027059703e-06, "loss": 0.4181, "step": 1731 }, { "epoch": 0.16856447688564477, "grad_norm": 1.658389742609111, "learning_rate": 9.505332702551272e-06, "loss": 0.4834, "step": 1732 }, { "epoch": 0.168661800486618, "grad_norm": 1.6313220070332846, "learning_rate": 9.50464893034516e-06, "loss": 0.6351, "step": 1733 }, { "epoch": 0.16875912408759125, "grad_norm": 1.4860828412814417, "learning_rate": 9.503964710509314e-06, "loss": 0.384, "step": 1734 }, { "epoch": 0.16885644768856448, "grad_norm": 1.5665989326823084, "learning_rate": 9.503280043111729e-06, "loss": 0.5031, "step": 1735 }, { "epoch": 0.1689537712895377, "grad_norm": 1.2627591310970376, "learning_rate": 9.502594928220437e-06, "loss": 0.3557, "step": 1736 }, { "epoch": 0.16905109489051096, "grad_norm": 1.6101827723851228, "learning_rate": 9.50190936590352e-06, "loss": 0.3886, "step": 1737 }, { "epoch": 0.16914841849148418, "grad_norm": 1.190927027644026, "learning_rate": 9.5012233562291e-06, "loss": 0.3, "step": 1738 }, { "epoch": 0.1692457420924574, "grad_norm": 1.6452233677093766, "learning_rate": 9.50053689926535e-06, "loss": 0.5808, "step": 1739 }, { "epoch": 0.16934306569343066, "grad_norm": 1.607284224817037, "learning_rate": 9.499849995080482e-06, "loss": 0.5726, "step": 1740 }, { "epoch": 0.1694403892944039, "grad_norm": 1.360873175063302, "learning_rate": 9.499162643742754e-06, "loss": 0.3294, "step": 1741 }, { "epoch": 0.16953771289537714, "grad_norm": 1.6205396325650636, "learning_rate": 9.49847484532047e-06, "loss": 0.5496, "step": 1742 }, { "epoch": 0.16963503649635037, "grad_norm": 1.6677491090337848, "learning_rate": 9.497786599881973e-06, "loss": 0.5745, "step": 1743 }, { "epoch": 0.1697323600973236, "grad_norm": 1.4765151889225172, "learning_rate": 9.497097907495658e-06, "loss": 0.3552, "step": 1744 }, { "epoch": 0.16982968369829685, "grad_norm": 1.4991516257283077, "learning_rate": 9.496408768229962e-06, "loss": 0.6004, "step": 1745 }, { "epoch": 0.16992700729927007, "grad_norm": 1.394241003611109, "learning_rate": 9.49571918215336e-06, "loss": 0.4166, "step": 1746 }, { "epoch": 0.1700243309002433, "grad_norm": 1.2418310265706307, "learning_rate": 9.495029149334381e-06, "loss": 0.3754, "step": 1747 }, { "epoch": 0.17012165450121655, "grad_norm": 1.7344174079178016, "learning_rate": 9.494338669841592e-06, "loss": 0.6136, "step": 1748 }, { "epoch": 0.17021897810218978, "grad_norm": 1.689754745813109, "learning_rate": 9.493647743743605e-06, "loss": 0.3066, "step": 1749 }, { "epoch": 0.170316301703163, "grad_norm": 1.5986274434851808, "learning_rate": 9.492956371109083e-06, "loss": 0.6476, "step": 1750 }, { "epoch": 0.17041362530413626, "grad_norm": 1.3892856963539753, "learning_rate": 9.492264552006725e-06, "loss": 0.2438, "step": 1751 }, { "epoch": 0.17051094890510948, "grad_norm": 1.3744062095245357, "learning_rate": 9.491572286505275e-06, "loss": 0.4154, "step": 1752 }, { "epoch": 0.17060827250608274, "grad_norm": 1.3041989445373636, "learning_rate": 9.490879574673528e-06, "loss": 0.3603, "step": 1753 }, { "epoch": 0.17070559610705596, "grad_norm": 1.2198251236981021, "learning_rate": 9.490186416580317e-06, "loss": 0.3382, "step": 1754 }, { "epoch": 0.1708029197080292, "grad_norm": 1.0699077871285796, "learning_rate": 9.489492812294521e-06, "loss": 0.2805, "step": 1755 }, { "epoch": 0.17090024330900244, "grad_norm": 1.8289792925797566, "learning_rate": 9.488798761885064e-06, "loss": 0.2551, "step": 1756 }, { "epoch": 0.17099756690997567, "grad_norm": 1.5156970449411904, "learning_rate": 9.488104265420917e-06, "loss": 0.5468, "step": 1757 }, { "epoch": 0.1710948905109489, "grad_norm": 1.3669899498040559, "learning_rate": 9.487409322971089e-06, "loss": 0.4705, "step": 1758 }, { "epoch": 0.17119221411192215, "grad_norm": 1.4212977316967985, "learning_rate": 9.486713934604638e-06, "loss": 0.5259, "step": 1759 }, { "epoch": 0.17128953771289537, "grad_norm": 1.3256503218660822, "learning_rate": 9.486018100390668e-06, "loss": 0.3825, "step": 1760 }, { "epoch": 0.17138686131386863, "grad_norm": 1.3448672418414023, "learning_rate": 9.485321820398321e-06, "loss": 0.4984, "step": 1761 }, { "epoch": 0.17148418491484185, "grad_norm": 1.3293122762885854, "learning_rate": 9.484625094696788e-06, "loss": 0.4419, "step": 1762 }, { "epoch": 0.17158150851581508, "grad_norm": 1.5749728003681251, "learning_rate": 9.483927923355303e-06, "loss": 0.4512, "step": 1763 }, { "epoch": 0.17167883211678833, "grad_norm": 2.1875449039755, "learning_rate": 9.483230306443144e-06, "loss": 0.4606, "step": 1764 }, { "epoch": 0.17177615571776156, "grad_norm": 1.4675466599593059, "learning_rate": 9.482532244029632e-06, "loss": 0.5098, "step": 1765 }, { "epoch": 0.17187347931873478, "grad_norm": 1.4395657499189969, "learning_rate": 9.481833736184137e-06, "loss": 0.4196, "step": 1766 }, { "epoch": 0.17197080291970804, "grad_norm": 1.6202346179751734, "learning_rate": 9.48113478297607e-06, "loss": 0.4083, "step": 1767 }, { "epoch": 0.17206812652068126, "grad_norm": 1.943359375, "learning_rate": 9.480435384474884e-06, "loss": 0.3829, "step": 1768 }, { "epoch": 0.1721654501216545, "grad_norm": 1.3957800309361543, "learning_rate": 9.47973554075008e-06, "loss": 0.4776, "step": 1769 }, { "epoch": 0.17226277372262774, "grad_norm": 1.3277740014703983, "learning_rate": 9.479035251871202e-06, "loss": 0.2944, "step": 1770 }, { "epoch": 0.17236009732360097, "grad_norm": 1.5955109684829234, "learning_rate": 9.478334517907838e-06, "loss": 0.4713, "step": 1771 }, { "epoch": 0.17245742092457422, "grad_norm": 1.209763677864614, "learning_rate": 9.477633338929621e-06, "loss": 0.2925, "step": 1772 }, { "epoch": 0.17255474452554745, "grad_norm": 1.6082316661319236, "learning_rate": 9.476931715006225e-06, "loss": 0.6037, "step": 1773 }, { "epoch": 0.17265206812652067, "grad_norm": 1.5310145605828824, "learning_rate": 9.476229646207375e-06, "loss": 0.427, "step": 1774 }, { "epoch": 0.17274939172749393, "grad_norm": 1.5161322305327478, "learning_rate": 9.475527132602833e-06, "loss": 0.5765, "step": 1775 }, { "epoch": 0.17284671532846715, "grad_norm": 1.5515912532543141, "learning_rate": 9.47482417426241e-06, "loss": 0.4693, "step": 1776 }, { "epoch": 0.17294403892944038, "grad_norm": 1.273583152257964, "learning_rate": 9.474120771255956e-06, "loss": 0.401, "step": 1777 }, { "epoch": 0.17304136253041363, "grad_norm": 1.3058387108850102, "learning_rate": 9.473416923653373e-06, "loss": 0.4651, "step": 1778 }, { "epoch": 0.17313868613138686, "grad_norm": 1.4876685295483647, "learning_rate": 9.472712631524599e-06, "loss": 0.5423, "step": 1779 }, { "epoch": 0.17323600973236009, "grad_norm": 1.4134646674679987, "learning_rate": 9.472007894939624e-06, "loss": 0.448, "step": 1780 }, { "epoch": 0.17333333333333334, "grad_norm": 1.3805564537318322, "learning_rate": 9.471302713968473e-06, "loss": 0.2429, "step": 1781 }, { "epoch": 0.17343065693430657, "grad_norm": 1.4256414475552066, "learning_rate": 9.470597088681225e-06, "loss": 0.4821, "step": 1782 }, { "epoch": 0.17352798053527982, "grad_norm": 1.2857804565204727, "learning_rate": 9.469891019147996e-06, "loss": 0.3177, "step": 1783 }, { "epoch": 0.17362530413625304, "grad_norm": 1.7384422656290006, "learning_rate": 9.46918450543895e-06, "loss": 0.6144, "step": 1784 }, { "epoch": 0.17372262773722627, "grad_norm": 1.3733947226466707, "learning_rate": 9.46847754762429e-06, "loss": 0.3777, "step": 1785 }, { "epoch": 0.17381995133819952, "grad_norm": 1.090627736959876, "learning_rate": 9.467770145774271e-06, "loss": 0.307, "step": 1786 }, { "epoch": 0.17391727493917275, "grad_norm": 1.5306242617959314, "learning_rate": 9.467062299959187e-06, "loss": 0.4652, "step": 1787 }, { "epoch": 0.17401459854014598, "grad_norm": 1.6335244702718128, "learning_rate": 9.466354010249375e-06, "loss": 0.5127, "step": 1788 }, { "epoch": 0.17411192214111923, "grad_norm": 1.3582351114688258, "learning_rate": 9.465645276715221e-06, "loss": 0.4213, "step": 1789 }, { "epoch": 0.17420924574209246, "grad_norm": 1.4962342995542501, "learning_rate": 9.464936099427151e-06, "loss": 0.4327, "step": 1790 }, { "epoch": 0.17430656934306568, "grad_norm": 1.513533102257641, "learning_rate": 9.464226478455636e-06, "loss": 0.4527, "step": 1791 }, { "epoch": 0.17440389294403894, "grad_norm": 1.4174664240767785, "learning_rate": 9.463516413871193e-06, "loss": 0.4986, "step": 1792 }, { "epoch": 0.17450121654501216, "grad_norm": 1.283758777636687, "learning_rate": 9.46280590574438e-06, "loss": 0.4648, "step": 1793 }, { "epoch": 0.17459854014598541, "grad_norm": 1.3960565511895506, "learning_rate": 9.4620949541458e-06, "loss": 0.3587, "step": 1794 }, { "epoch": 0.17469586374695864, "grad_norm": 1.6199676647020385, "learning_rate": 9.461383559146104e-06, "loss": 0.5292, "step": 1795 }, { "epoch": 0.17479318734793187, "grad_norm": 1.5028051531717803, "learning_rate": 9.46067172081598e-06, "loss": 0.4903, "step": 1796 }, { "epoch": 0.17489051094890512, "grad_norm": 1.683063467822515, "learning_rate": 9.459959439226165e-06, "loss": 0.3106, "step": 1797 }, { "epoch": 0.17498783454987835, "grad_norm": 1.3296224342860092, "learning_rate": 9.459246714447439e-06, "loss": 0.409, "step": 1798 }, { "epoch": 0.17508515815085157, "grad_norm": 1.5847151231323486, "learning_rate": 9.458533546550628e-06, "loss": 0.4169, "step": 1799 }, { "epoch": 0.17518248175182483, "grad_norm": 1.495253204796384, "learning_rate": 9.457819935606596e-06, "loss": 0.3753, "step": 1800 }, { "epoch": 0.17527980535279805, "grad_norm": 1.4723876384358174, "learning_rate": 9.45710588168626e-06, "loss": 0.2437, "step": 1801 }, { "epoch": 0.17537712895377128, "grad_norm": 1.5610295815557715, "learning_rate": 9.45639138486057e-06, "loss": 0.5651, "step": 1802 }, { "epoch": 0.17547445255474453, "grad_norm": 1.5876154549734276, "learning_rate": 9.45567644520053e-06, "loss": 0.4835, "step": 1803 }, { "epoch": 0.17557177615571776, "grad_norm": 1.5619759252942187, "learning_rate": 9.454961062777181e-06, "loss": 0.3036, "step": 1804 }, { "epoch": 0.175669099756691, "grad_norm": 1.0144029160546408, "learning_rate": 9.454245237661617e-06, "loss": 0.219, "step": 1805 }, { "epoch": 0.17576642335766424, "grad_norm": 1.717922774563162, "learning_rate": 9.453528969924963e-06, "loss": 0.5388, "step": 1806 }, { "epoch": 0.17586374695863746, "grad_norm": 1.288743598100688, "learning_rate": 9.452812259638399e-06, "loss": 0.4171, "step": 1807 }, { "epoch": 0.17596107055961072, "grad_norm": 1.951279890184611, "learning_rate": 9.452095106873142e-06, "loss": 0.3823, "step": 1808 }, { "epoch": 0.17605839416058394, "grad_norm": 1.352467757455935, "learning_rate": 9.45137751170046e-06, "loss": 0.3137, "step": 1809 }, { "epoch": 0.17615571776155717, "grad_norm": 1.3883395327139227, "learning_rate": 9.450659474191658e-06, "loss": 0.4878, "step": 1810 }, { "epoch": 0.17625304136253042, "grad_norm": 1.5658708451700805, "learning_rate": 9.449940994418088e-06, "loss": 0.523, "step": 1811 }, { "epoch": 0.17635036496350365, "grad_norm": 1.215080164631292, "learning_rate": 9.449222072451147e-06, "loss": 0.3773, "step": 1812 }, { "epoch": 0.17644768856447687, "grad_norm": 1.524331324578441, "learning_rate": 9.448502708362273e-06, "loss": 0.539, "step": 1813 }, { "epoch": 0.17654501216545013, "grad_norm": 1.6985132616371517, "learning_rate": 9.447782902222951e-06, "loss": 0.6344, "step": 1814 }, { "epoch": 0.17664233576642335, "grad_norm": 1.394156226586294, "learning_rate": 9.447062654104708e-06, "loss": 0.4136, "step": 1815 }, { "epoch": 0.1767396593673966, "grad_norm": 1.0359913462457855, "learning_rate": 9.446341964079116e-06, "loss": 0.2471, "step": 1816 }, { "epoch": 0.17683698296836983, "grad_norm": 1.6379291001324041, "learning_rate": 9.44562083221779e-06, "loss": 0.4648, "step": 1817 }, { "epoch": 0.17693430656934306, "grad_norm": 1.0926982727654353, "learning_rate": 9.44489925859239e-06, "loss": 0.253, "step": 1818 }, { "epoch": 0.1770316301703163, "grad_norm": 1.3396314447206463, "learning_rate": 9.444177243274619e-06, "loss": 0.4053, "step": 1819 }, { "epoch": 0.17712895377128954, "grad_norm": 1.2170984864894128, "learning_rate": 9.44345478633622e-06, "loss": 0.3483, "step": 1820 }, { "epoch": 0.17722627737226276, "grad_norm": 1.9241463489982464, "learning_rate": 9.442731887848993e-06, "loss": 0.7875, "step": 1821 }, { "epoch": 0.17732360097323602, "grad_norm": 1.7367037011857493, "learning_rate": 9.442008547884765e-06, "loss": 0.5423, "step": 1822 }, { "epoch": 0.17742092457420924, "grad_norm": 1.7768925691501514, "learning_rate": 9.441284766515417e-06, "loss": 0.5332, "step": 1823 }, { "epoch": 0.17751824817518247, "grad_norm": 1.544872490519166, "learning_rate": 9.440560543812872e-06, "loss": 0.4797, "step": 1824 }, { "epoch": 0.17761557177615572, "grad_norm": 1.3959412272112985, "learning_rate": 9.439835879849097e-06, "loss": 0.2813, "step": 1825 }, { "epoch": 0.17771289537712895, "grad_norm": 1.4333698815114406, "learning_rate": 9.439110774696101e-06, "loss": 0.4623, "step": 1826 }, { "epoch": 0.1778102189781022, "grad_norm": 1.4483549520432324, "learning_rate": 9.43838522842594e-06, "loss": 0.3718, "step": 1827 }, { "epoch": 0.17790754257907543, "grad_norm": 1.1321375447475677, "learning_rate": 9.43765924111071e-06, "loss": 0.3035, "step": 1828 }, { "epoch": 0.17800486618004865, "grad_norm": 1.362326738732822, "learning_rate": 9.436932812822554e-06, "loss": 0.316, "step": 1829 }, { "epoch": 0.1781021897810219, "grad_norm": 1.460799021966237, "learning_rate": 9.436205943633656e-06, "loss": 0.3911, "step": 1830 }, { "epoch": 0.17819951338199513, "grad_norm": 1.5389161016090995, "learning_rate": 9.435478633616247e-06, "loss": 0.5521, "step": 1831 }, { "epoch": 0.17829683698296836, "grad_norm": 1.5219868331018827, "learning_rate": 9.4347508828426e-06, "loss": 0.5027, "step": 1832 }, { "epoch": 0.1783941605839416, "grad_norm": 1.245087028586955, "learning_rate": 9.434022691385034e-06, "loss": 0.2981, "step": 1833 }, { "epoch": 0.17849148418491484, "grad_norm": 1.4557548434245557, "learning_rate": 9.433294059315905e-06, "loss": 0.2293, "step": 1834 }, { "epoch": 0.17858880778588807, "grad_norm": 1.3081558633618169, "learning_rate": 9.432564986707621e-06, "loss": 0.4217, "step": 1835 }, { "epoch": 0.17868613138686132, "grad_norm": 1.3513560054673133, "learning_rate": 9.43183547363263e-06, "loss": 0.4318, "step": 1836 }, { "epoch": 0.17878345498783454, "grad_norm": 1.3315264956466353, "learning_rate": 9.431105520163426e-06, "loss": 0.3781, "step": 1837 }, { "epoch": 0.1788807785888078, "grad_norm": 1.0550787306059675, "learning_rate": 9.430375126372542e-06, "loss": 0.3104, "step": 1838 }, { "epoch": 0.17897810218978102, "grad_norm": 1.3337629142786684, "learning_rate": 9.429644292332557e-06, "loss": 0.3455, "step": 1839 }, { "epoch": 0.17907542579075425, "grad_norm": 1.6239197882024916, "learning_rate": 9.428913018116098e-06, "loss": 0.5855, "step": 1840 }, { "epoch": 0.1791727493917275, "grad_norm": 1.3780162846249417, "learning_rate": 9.428181303795828e-06, "loss": 0.3643, "step": 1841 }, { "epoch": 0.17927007299270073, "grad_norm": 1.3478310292007554, "learning_rate": 9.42744914944446e-06, "loss": 0.3962, "step": 1842 }, { "epoch": 0.17936739659367396, "grad_norm": 1.5440243743593307, "learning_rate": 9.426716555134751e-06, "loss": 0.6193, "step": 1843 }, { "epoch": 0.1794647201946472, "grad_norm": 1.4878960058265709, "learning_rate": 9.425983520939495e-06, "loss": 0.473, "step": 1844 }, { "epoch": 0.17956204379562044, "grad_norm": 1.672460221871015, "learning_rate": 9.425250046931539e-06, "loss": 0.6429, "step": 1845 }, { "epoch": 0.17965936739659366, "grad_norm": 1.6015212635221012, "learning_rate": 9.424516133183762e-06, "loss": 0.3195, "step": 1846 }, { "epoch": 0.17975669099756691, "grad_norm": 1.385761715171386, "learning_rate": 9.4237817797691e-06, "loss": 0.4054, "step": 1847 }, { "epoch": 0.17985401459854014, "grad_norm": 1.386847906411032, "learning_rate": 9.423046986760522e-06, "loss": 0.3825, "step": 1848 }, { "epoch": 0.1799513381995134, "grad_norm": 1.087510047515406, "learning_rate": 9.422311754231047e-06, "loss": 0.3213, "step": 1849 }, { "epoch": 0.18004866180048662, "grad_norm": 1.6065416301387576, "learning_rate": 9.421576082253734e-06, "loss": 0.5062, "step": 1850 }, { "epoch": 0.18014598540145985, "grad_norm": 1.34096451308299, "learning_rate": 9.42083997090169e-06, "loss": 0.4036, "step": 1851 }, { "epoch": 0.1802433090024331, "grad_norm": 1.2557739418598393, "learning_rate": 9.42010342024806e-06, "loss": 0.3595, "step": 1852 }, { "epoch": 0.18034063260340633, "grad_norm": 1.5281441778996137, "learning_rate": 9.419366430366035e-06, "loss": 0.604, "step": 1853 }, { "epoch": 0.18043795620437955, "grad_norm": 1.2665309724570952, "learning_rate": 9.418629001328852e-06, "loss": 0.4205, "step": 1854 }, { "epoch": 0.1805352798053528, "grad_norm": 1.3442942382162348, "learning_rate": 9.417891133209789e-06, "loss": 0.3457, "step": 1855 }, { "epoch": 0.18063260340632603, "grad_norm": 1.4106593198915445, "learning_rate": 9.417152826082169e-06, "loss": 0.4812, "step": 1856 }, { "epoch": 0.18072992700729926, "grad_norm": 1.4377180846268287, "learning_rate": 9.416414080019359e-06, "loss": 0.4618, "step": 1857 }, { "epoch": 0.1808272506082725, "grad_norm": 3.1493721230250182, "learning_rate": 9.415674895094765e-06, "loss": 0.4636, "step": 1858 }, { "epoch": 0.18092457420924574, "grad_norm": 1.2019926414899231, "learning_rate": 9.414935271381844e-06, "loss": 0.3081, "step": 1859 }, { "epoch": 0.181021897810219, "grad_norm": 2.6470194483303042, "learning_rate": 9.41419520895409e-06, "loss": 0.545, "step": 1860 }, { "epoch": 0.18111922141119222, "grad_norm": 1.2980614715591199, "learning_rate": 9.413454707885048e-06, "loss": 0.2964, "step": 1861 }, { "epoch": 0.18121654501216544, "grad_norm": 1.0776172492719038, "learning_rate": 9.412713768248296e-06, "loss": 0.3014, "step": 1862 }, { "epoch": 0.1813138686131387, "grad_norm": 1.6105644497131084, "learning_rate": 9.411972390117466e-06, "loss": 0.2939, "step": 1863 }, { "epoch": 0.18141119221411192, "grad_norm": 1.5656908641978677, "learning_rate": 9.411230573566227e-06, "loss": 0.5202, "step": 1864 }, { "epoch": 0.18150851581508515, "grad_norm": 1.303806212869287, "learning_rate": 9.410488318668294e-06, "loss": 0.333, "step": 1865 }, { "epoch": 0.1816058394160584, "grad_norm": 1.6655746538236336, "learning_rate": 9.409745625497427e-06, "loss": 0.432, "step": 1866 }, { "epoch": 0.18170316301703163, "grad_norm": 1.3843667729738216, "learning_rate": 9.409002494127427e-06, "loss": 0.3721, "step": 1867 }, { "epoch": 0.18180048661800485, "grad_norm": 1.119511993732411, "learning_rate": 9.408258924632139e-06, "loss": 0.3344, "step": 1868 }, { "epoch": 0.1818978102189781, "grad_norm": 1.402581324947916, "learning_rate": 9.407514917085451e-06, "loss": 0.4016, "step": 1869 }, { "epoch": 0.18199513381995133, "grad_norm": 1.424239738841203, "learning_rate": 9.406770471561298e-06, "loss": 0.4043, "step": 1870 }, { "epoch": 0.18209245742092459, "grad_norm": 1.4825401610777273, "learning_rate": 9.406025588133654e-06, "loss": 0.5446, "step": 1871 }, { "epoch": 0.1821897810218978, "grad_norm": 1.1812973154269832, "learning_rate": 9.405280266876539e-06, "loss": 0.3086, "step": 1872 }, { "epoch": 0.18228710462287104, "grad_norm": 1.458454653825207, "learning_rate": 9.404534507864015e-06, "loss": 0.426, "step": 1873 }, { "epoch": 0.1823844282238443, "grad_norm": 1.4345175445802738, "learning_rate": 9.403788311170193e-06, "loss": 0.4826, "step": 1874 }, { "epoch": 0.18248175182481752, "grad_norm": 1.636664123351898, "learning_rate": 9.403041676869217e-06, "loss": 0.5861, "step": 1875 }, { "epoch": 0.18257907542579074, "grad_norm": 1.4112207510715695, "learning_rate": 9.402294605035285e-06, "loss": 0.3575, "step": 1876 }, { "epoch": 0.182676399026764, "grad_norm": 1.5632317164864975, "learning_rate": 9.401547095742631e-06, "loss": 0.5798, "step": 1877 }, { "epoch": 0.18277372262773722, "grad_norm": 1.2700759423445944, "learning_rate": 9.400799149065538e-06, "loss": 0.3928, "step": 1878 }, { "epoch": 0.18287104622871045, "grad_norm": 1.1318646905388465, "learning_rate": 9.400050765078327e-06, "loss": 0.2783, "step": 1879 }, { "epoch": 0.1829683698296837, "grad_norm": 1.1697084872304198, "learning_rate": 9.399301943855368e-06, "loss": 0.2715, "step": 1880 }, { "epoch": 0.18306569343065693, "grad_norm": 1.4137887426273796, "learning_rate": 9.39855268547107e-06, "loss": 0.3049, "step": 1881 }, { "epoch": 0.18316301703163018, "grad_norm": 1.3869164554267486, "learning_rate": 9.397802989999888e-06, "loss": 0.3526, "step": 1882 }, { "epoch": 0.1832603406326034, "grad_norm": 1.3336674996684654, "learning_rate": 9.39705285751632e-06, "loss": 0.3914, "step": 1883 }, { "epoch": 0.18335766423357663, "grad_norm": 1.2095628873380657, "learning_rate": 9.396302288094907e-06, "loss": 0.3577, "step": 1884 }, { "epoch": 0.1834549878345499, "grad_norm": 1.4741118747641506, "learning_rate": 9.395551281810233e-06, "loss": 0.4753, "step": 1885 }, { "epoch": 0.1835523114355231, "grad_norm": 1.5440799623052803, "learning_rate": 9.394799838736928e-06, "loss": 0.5143, "step": 1886 }, { "epoch": 0.18364963503649634, "grad_norm": 1.6461828641301555, "learning_rate": 9.394047958949661e-06, "loss": 0.5046, "step": 1887 }, { "epoch": 0.1837469586374696, "grad_norm": 1.3077272649446732, "learning_rate": 9.393295642523147e-06, "loss": 0.4505, "step": 1888 }, { "epoch": 0.18384428223844282, "grad_norm": 1.3954964938282017, "learning_rate": 9.392542889532146e-06, "loss": 0.3752, "step": 1889 }, { "epoch": 0.18394160583941604, "grad_norm": 1.4332674159188397, "learning_rate": 9.391789700051457e-06, "loss": 0.4102, "step": 1890 }, { "epoch": 0.1840389294403893, "grad_norm": 1.5291760471205262, "learning_rate": 9.391036074155926e-06, "loss": 0.3892, "step": 1891 }, { "epoch": 0.18413625304136252, "grad_norm": 1.3194046059109847, "learning_rate": 9.390282011920442e-06, "loss": 0.3402, "step": 1892 }, { "epoch": 0.18423357664233578, "grad_norm": 1.1218553674196712, "learning_rate": 9.389527513419935e-06, "loss": 0.2705, "step": 1893 }, { "epoch": 0.184330900243309, "grad_norm": 1.4415924034763155, "learning_rate": 9.388772578729382e-06, "loss": 0.4153, "step": 1894 }, { "epoch": 0.18442822384428223, "grad_norm": 1.1449469634853555, "learning_rate": 9.3880172079238e-06, "loss": 0.2464, "step": 1895 }, { "epoch": 0.18452554744525548, "grad_norm": 1.3609647553229742, "learning_rate": 9.38726140107825e-06, "loss": 0.4167, "step": 1896 }, { "epoch": 0.1846228710462287, "grad_norm": 1.5005607351629322, "learning_rate": 9.38650515826784e-06, "loss": 0.5496, "step": 1897 }, { "epoch": 0.18472019464720194, "grad_norm": 1.2988771816540412, "learning_rate": 9.385748479567715e-06, "loss": 0.3746, "step": 1898 }, { "epoch": 0.1848175182481752, "grad_norm": 1.6297457427665438, "learning_rate": 9.384991365053066e-06, "loss": 0.5329, "step": 1899 }, { "epoch": 0.18491484184914841, "grad_norm": 1.4260746902123356, "learning_rate": 9.384233814799133e-06, "loss": 0.5495, "step": 1900 }, { "epoch": 0.18501216545012167, "grad_norm": 1.6131616876000299, "learning_rate": 9.38347582888119e-06, "loss": 0.4956, "step": 1901 }, { "epoch": 0.1851094890510949, "grad_norm": 1.2427047036633028, "learning_rate": 9.382717407374559e-06, "loss": 0.3527, "step": 1902 }, { "epoch": 0.18520681265206812, "grad_norm": 1.1650358905093554, "learning_rate": 9.381958550354607e-06, "loss": 0.3282, "step": 1903 }, { "epoch": 0.18530413625304137, "grad_norm": 1.2422827918011654, "learning_rate": 9.381199257896738e-06, "loss": 0.3954, "step": 1904 }, { "epoch": 0.1854014598540146, "grad_norm": 1.3772059864511268, "learning_rate": 9.38043953007641e-06, "loss": 0.2519, "step": 1905 }, { "epoch": 0.18549878345498783, "grad_norm": 1.2627132972091453, "learning_rate": 9.379679366969108e-06, "loss": 0.3748, "step": 1906 }, { "epoch": 0.18559610705596108, "grad_norm": 1.7742544300786764, "learning_rate": 9.378918768650379e-06, "loss": 0.4627, "step": 1907 }, { "epoch": 0.1856934306569343, "grad_norm": 1.3460661864821146, "learning_rate": 9.3781577351958e-06, "loss": 0.3769, "step": 1908 }, { "epoch": 0.18579075425790753, "grad_norm": 1.2948363493096455, "learning_rate": 9.377396266680993e-06, "loss": 0.255, "step": 1909 }, { "epoch": 0.18588807785888078, "grad_norm": 1.4260435934265066, "learning_rate": 9.376634363181631e-06, "loss": 0.4158, "step": 1910 }, { "epoch": 0.185985401459854, "grad_norm": 1.4136193355548345, "learning_rate": 9.375872024773423e-06, "loss": 0.3764, "step": 1911 }, { "epoch": 0.18608272506082726, "grad_norm": 1.2338333390059972, "learning_rate": 9.375109251532121e-06, "loss": 0.3785, "step": 1912 }, { "epoch": 0.1861800486618005, "grad_norm": 1.535249430616727, "learning_rate": 9.374346043533524e-06, "loss": 0.5252, "step": 1913 }, { "epoch": 0.18627737226277372, "grad_norm": 1.215284604855692, "learning_rate": 9.373582400853472e-06, "loss": 0.3295, "step": 1914 }, { "epoch": 0.18637469586374697, "grad_norm": 1.331605367733698, "learning_rate": 9.372818323567847e-06, "loss": 0.2818, "step": 1915 }, { "epoch": 0.1864720194647202, "grad_norm": 1.3700650260278666, "learning_rate": 9.37205381175258e-06, "loss": 0.5125, "step": 1916 }, { "epoch": 0.18656934306569342, "grad_norm": 1.0730618437287824, "learning_rate": 9.371288865483637e-06, "loss": 0.3608, "step": 1917 }, { "epoch": 0.18666666666666668, "grad_norm": 1.6775147335354874, "learning_rate": 9.370523484837033e-06, "loss": 0.4555, "step": 1918 }, { "epoch": 0.1867639902676399, "grad_norm": 1.531630799569193, "learning_rate": 9.369757669888822e-06, "loss": 0.502, "step": 1919 }, { "epoch": 0.18686131386861313, "grad_norm": 0.924734272033398, "learning_rate": 9.368991420715109e-06, "loss": 0.2117, "step": 1920 }, { "epoch": 0.18695863746958638, "grad_norm": 1.3568146369682141, "learning_rate": 9.36822473739203e-06, "loss": 0.4311, "step": 1921 }, { "epoch": 0.1870559610705596, "grad_norm": 1.2577909858711795, "learning_rate": 9.367457619995776e-06, "loss": 0.405, "step": 1922 }, { "epoch": 0.18715328467153286, "grad_norm": 1.5933524739274278, "learning_rate": 9.366690068602573e-06, "loss": 0.627, "step": 1923 }, { "epoch": 0.18725060827250609, "grad_norm": 1.279419778059805, "learning_rate": 9.365922083288694e-06, "loss": 0.2814, "step": 1924 }, { "epoch": 0.1873479318734793, "grad_norm": 1.6336124778487715, "learning_rate": 9.365153664130454e-06, "loss": 0.6461, "step": 1925 }, { "epoch": 0.18744525547445257, "grad_norm": 5.906434394339674, "learning_rate": 9.364384811204212e-06, "loss": 0.5628, "step": 1926 }, { "epoch": 0.1875425790754258, "grad_norm": 1.2770793302804129, "learning_rate": 9.363615524586368e-06, "loss": 0.303, "step": 1927 }, { "epoch": 0.18763990267639902, "grad_norm": 1.2695156624644028, "learning_rate": 9.362845804353367e-06, "loss": 0.3592, "step": 1928 }, { "epoch": 0.18773722627737227, "grad_norm": 1.4443375056776053, "learning_rate": 9.362075650581698e-06, "loss": 0.4701, "step": 1929 }, { "epoch": 0.1878345498783455, "grad_norm": 1.4330727776563095, "learning_rate": 9.36130506334789e-06, "loss": 0.5163, "step": 1930 }, { "epoch": 0.18793187347931872, "grad_norm": 1.326934280688427, "learning_rate": 9.360534042728517e-06, "loss": 0.289, "step": 1931 }, { "epoch": 0.18802919708029198, "grad_norm": 1.0531370847104877, "learning_rate": 9.359762588800195e-06, "loss": 0.1994, "step": 1932 }, { "epoch": 0.1881265206812652, "grad_norm": 1.4998435892573359, "learning_rate": 9.358990701639585e-06, "loss": 0.4064, "step": 1933 }, { "epoch": 0.18822384428223846, "grad_norm": 2.65155925581941, "learning_rate": 9.358218381323391e-06, "loss": 0.3513, "step": 1934 }, { "epoch": 0.18832116788321168, "grad_norm": 1.280523326704506, "learning_rate": 9.357445627928356e-06, "loss": 0.3132, "step": 1935 }, { "epoch": 0.1884184914841849, "grad_norm": 1.347047087613105, "learning_rate": 9.356672441531273e-06, "loss": 0.3334, "step": 1936 }, { "epoch": 0.18851581508515816, "grad_norm": 1.2987558904079175, "learning_rate": 9.35589882220897e-06, "loss": 0.3224, "step": 1937 }, { "epoch": 0.1886131386861314, "grad_norm": 0.9974048438134153, "learning_rate": 9.355124770038323e-06, "loss": 0.2764, "step": 1938 }, { "epoch": 0.1887104622871046, "grad_norm": 2.544180913694316, "learning_rate": 9.354350285096255e-06, "loss": 0.495, "step": 1939 }, { "epoch": 0.18880778588807787, "grad_norm": 1.613510595834776, "learning_rate": 9.353575367459718e-06, "loss": 0.5269, "step": 1940 }, { "epoch": 0.1889051094890511, "grad_norm": 1.1663508101189002, "learning_rate": 9.352800017205724e-06, "loss": 0.3936, "step": 1941 }, { "epoch": 0.18900243309002432, "grad_norm": 1.3673811421181858, "learning_rate": 9.352024234411315e-06, "loss": 0.4448, "step": 1942 }, { "epoch": 0.18909975669099757, "grad_norm": 1.1481373712644614, "learning_rate": 9.351248019153582e-06, "loss": 0.3226, "step": 1943 }, { "epoch": 0.1891970802919708, "grad_norm": 1.025014870233366, "learning_rate": 9.350471371509659e-06, "loss": 0.2095, "step": 1944 }, { "epoch": 0.18929440389294405, "grad_norm": 1.6587902238420225, "learning_rate": 9.349694291556723e-06, "loss": 0.3805, "step": 1945 }, { "epoch": 0.18939172749391728, "grad_norm": 1.568770301353131, "learning_rate": 9.348916779371993e-06, "loss": 0.3902, "step": 1946 }, { "epoch": 0.1894890510948905, "grad_norm": 1.4274566422005779, "learning_rate": 9.348138835032727e-06, "loss": 0.3644, "step": 1947 }, { "epoch": 0.18958637469586376, "grad_norm": 1.6590398647584288, "learning_rate": 9.347360458616233e-06, "loss": 0.3522, "step": 1948 }, { "epoch": 0.18968369829683698, "grad_norm": 1.5905934658559544, "learning_rate": 9.346581650199859e-06, "loss": 0.3784, "step": 1949 }, { "epoch": 0.1897810218978102, "grad_norm": 1.358850838464726, "learning_rate": 9.345802409860995e-06, "loss": 0.3407, "step": 1950 }, { "epoch": 0.18987834549878346, "grad_norm": 1.5906740312195304, "learning_rate": 9.345022737677073e-06, "loss": 0.4735, "step": 1951 }, { "epoch": 0.1899756690997567, "grad_norm": 1.419279223309371, "learning_rate": 9.344242633725573e-06, "loss": 0.4677, "step": 1952 }, { "epoch": 0.19007299270072991, "grad_norm": 2.368125402390624, "learning_rate": 9.34346209808401e-06, "loss": 0.4341, "step": 1953 }, { "epoch": 0.19017031630170317, "grad_norm": 1.6018933954570558, "learning_rate": 9.342681130829949e-06, "loss": 0.4348, "step": 1954 }, { "epoch": 0.1902676399026764, "grad_norm": 1.4757982324740848, "learning_rate": 9.341899732040996e-06, "loss": 0.393, "step": 1955 }, { "epoch": 0.19036496350364965, "grad_norm": 1.463093762624457, "learning_rate": 9.341117901794797e-06, "loss": 0.3787, "step": 1956 }, { "epoch": 0.19046228710462287, "grad_norm": 1.5507561900230402, "learning_rate": 9.340335640169045e-06, "loss": 0.4715, "step": 1957 }, { "epoch": 0.1905596107055961, "grad_norm": 1.4207468273121375, "learning_rate": 9.339552947241471e-06, "loss": 0.3938, "step": 1958 }, { "epoch": 0.19065693430656935, "grad_norm": 1.407596113402629, "learning_rate": 9.338769823089853e-06, "loss": 0.4965, "step": 1959 }, { "epoch": 0.19075425790754258, "grad_norm": 1.5505869092648736, "learning_rate": 9.337986267792014e-06, "loss": 0.3699, "step": 1960 }, { "epoch": 0.1908515815085158, "grad_norm": 1.4558635051434323, "learning_rate": 9.33720228142581e-06, "loss": 0.3436, "step": 1961 }, { "epoch": 0.19094890510948906, "grad_norm": 1.4210127007858437, "learning_rate": 9.336417864069152e-06, "loss": 0.3959, "step": 1962 }, { "epoch": 0.19104622871046228, "grad_norm": 1.5797691467496429, "learning_rate": 9.335633015799983e-06, "loss": 0.5438, "step": 1963 }, { "epoch": 0.1911435523114355, "grad_norm": 1.200940613853037, "learning_rate": 9.334847736696297e-06, "loss": 0.3037, "step": 1964 }, { "epoch": 0.19124087591240876, "grad_norm": 1.6206966051553, "learning_rate": 9.334062026836128e-06, "loss": 0.6412, "step": 1965 }, { "epoch": 0.191338199513382, "grad_norm": 1.3678147539203456, "learning_rate": 9.33327588629755e-06, "loss": 0.328, "step": 1966 }, { "epoch": 0.19143552311435524, "grad_norm": 1.425436568728509, "learning_rate": 9.332489315158685e-06, "loss": 0.42, "step": 1967 }, { "epoch": 0.19153284671532847, "grad_norm": 1.4740185495034979, "learning_rate": 9.331702313497693e-06, "loss": 0.3563, "step": 1968 }, { "epoch": 0.1916301703163017, "grad_norm": 1.4865130636524604, "learning_rate": 9.33091488139278e-06, "loss": 0.3452, "step": 1969 }, { "epoch": 0.19172749391727495, "grad_norm": 1.595704917953399, "learning_rate": 9.330127018922195e-06, "loss": 0.6593, "step": 1970 }, { "epoch": 0.19182481751824818, "grad_norm": 1.4305855687191487, "learning_rate": 9.329338726164225e-06, "loss": 0.4935, "step": 1971 }, { "epoch": 0.1919221411192214, "grad_norm": 1.4810316480182457, "learning_rate": 9.328550003197203e-06, "loss": 0.4303, "step": 1972 }, { "epoch": 0.19201946472019465, "grad_norm": 1.1937939840472271, "learning_rate": 9.32776085009951e-06, "loss": 0.3178, "step": 1973 }, { "epoch": 0.19211678832116788, "grad_norm": 1.3344201288029265, "learning_rate": 9.326971266949558e-06, "loss": 0.3469, "step": 1974 }, { "epoch": 0.1922141119221411, "grad_norm": 1.5818137690503504, "learning_rate": 9.326181253825813e-06, "loss": 0.505, "step": 1975 }, { "epoch": 0.19231143552311436, "grad_norm": 1.263126969220317, "learning_rate": 9.325390810806778e-06, "loss": 0.3967, "step": 1976 }, { "epoch": 0.19240875912408759, "grad_norm": 1.6967730581105949, "learning_rate": 9.324599937971e-06, "loss": 0.7353, "step": 1977 }, { "epoch": 0.19250608272506084, "grad_norm": 1.4550804189369502, "learning_rate": 9.323808635397067e-06, "loss": 0.3326, "step": 1978 }, { "epoch": 0.19260340632603407, "grad_norm": 1.594493767215082, "learning_rate": 9.323016903163612e-06, "loss": 0.4547, "step": 1979 }, { "epoch": 0.1927007299270073, "grad_norm": 1.4855552398261571, "learning_rate": 9.322224741349313e-06, "loss": 0.5095, "step": 1980 }, { "epoch": 0.19279805352798055, "grad_norm": 1.3769945503658922, "learning_rate": 9.321432150032884e-06, "loss": 0.3853, "step": 1981 }, { "epoch": 0.19289537712895377, "grad_norm": 1.3138128708042736, "learning_rate": 9.320639129293083e-06, "loss": 0.4129, "step": 1982 }, { "epoch": 0.192992700729927, "grad_norm": 1.4617598559962484, "learning_rate": 9.319845679208719e-06, "loss": 0.449, "step": 1983 }, { "epoch": 0.19309002433090025, "grad_norm": 1.6332060417216765, "learning_rate": 9.319051799858633e-06, "loss": 0.594, "step": 1984 }, { "epoch": 0.19318734793187348, "grad_norm": 1.5432637765560855, "learning_rate": 9.318257491321714e-06, "loss": 0.3465, "step": 1985 }, { "epoch": 0.1932846715328467, "grad_norm": 1.4536395238750577, "learning_rate": 9.317462753676895e-06, "loss": 0.4212, "step": 1986 }, { "epoch": 0.19338199513381996, "grad_norm": 1.3985266204226148, "learning_rate": 9.31666758700315e-06, "loss": 0.5313, "step": 1987 }, { "epoch": 0.19347931873479318, "grad_norm": 1.4329939166816383, "learning_rate": 9.315871991379493e-06, "loss": 0.3958, "step": 1988 }, { "epoch": 0.19357664233576644, "grad_norm": 1.3666417803863316, "learning_rate": 9.315075966884984e-06, "loss": 0.462, "step": 1989 }, { "epoch": 0.19367396593673966, "grad_norm": 1.6059064802064114, "learning_rate": 9.314279513598721e-06, "loss": 0.5734, "step": 1990 }, { "epoch": 0.1937712895377129, "grad_norm": 1.521730062801285, "learning_rate": 9.313482631599854e-06, "loss": 0.3479, "step": 1991 }, { "epoch": 0.19386861313868614, "grad_norm": 1.5212897395363751, "learning_rate": 9.312685320967566e-06, "loss": 0.4328, "step": 1992 }, { "epoch": 0.19396593673965937, "grad_norm": 1.669365255826549, "learning_rate": 9.311887581781086e-06, "loss": 0.6153, "step": 1993 }, { "epoch": 0.1940632603406326, "grad_norm": 1.1692329123053622, "learning_rate": 9.311089414119688e-06, "loss": 0.3149, "step": 1994 }, { "epoch": 0.19416058394160585, "grad_norm": 1.4724909439197027, "learning_rate": 9.310290818062683e-06, "loss": 0.478, "step": 1995 }, { "epoch": 0.19425790754257907, "grad_norm": 1.667688851021317, "learning_rate": 9.309491793689431e-06, "loss": 0.6192, "step": 1996 }, { "epoch": 0.1943552311435523, "grad_norm": 1.2423474670669281, "learning_rate": 9.30869234107933e-06, "loss": 0.4242, "step": 1997 }, { "epoch": 0.19445255474452555, "grad_norm": 1.4117486896728357, "learning_rate": 9.307892460311825e-06, "loss": 0.4417, "step": 1998 }, { "epoch": 0.19454987834549878, "grad_norm": 1.6605518542896853, "learning_rate": 9.307092151466397e-06, "loss": 0.5289, "step": 1999 }, { "epoch": 0.19464720194647203, "grad_norm": 1.661933360658536, "learning_rate": 9.306291414622575e-06, "loss": 0.3357, "step": 2000 }, { "epoch": 0.19474452554744526, "grad_norm": 1.4409618985011814, "learning_rate": 9.305490249859927e-06, "loss": 0.4563, "step": 2001 }, { "epoch": 0.19484184914841848, "grad_norm": 1.9082899591217046, "learning_rate": 9.304688657258068e-06, "loss": 0.3445, "step": 2002 }, { "epoch": 0.19493917274939174, "grad_norm": 1.2157434891172034, "learning_rate": 9.303886636896649e-06, "loss": 0.3719, "step": 2003 }, { "epoch": 0.19503649635036496, "grad_norm": 1.57236888854409, "learning_rate": 9.303084188855371e-06, "loss": 0.4399, "step": 2004 }, { "epoch": 0.1951338199513382, "grad_norm": 1.4041570559360463, "learning_rate": 9.302281313213973e-06, "loss": 0.4442, "step": 2005 }, { "epoch": 0.19523114355231144, "grad_norm": 1.595081147428658, "learning_rate": 9.301478010052237e-06, "loss": 0.4225, "step": 2006 }, { "epoch": 0.19532846715328467, "grad_norm": 1.562924823229517, "learning_rate": 9.300674279449986e-06, "loss": 0.3739, "step": 2007 }, { "epoch": 0.1954257907542579, "grad_norm": 1.6925679153497177, "learning_rate": 9.299870121487088e-06, "loss": 0.4465, "step": 2008 }, { "epoch": 0.19552311435523115, "grad_norm": 1.4955175500348226, "learning_rate": 9.299065536243453e-06, "loss": 0.5055, "step": 2009 }, { "epoch": 0.19562043795620437, "grad_norm": 1.5602814755448668, "learning_rate": 9.298260523799035e-06, "loss": 0.4214, "step": 2010 }, { "epoch": 0.19571776155717763, "grad_norm": 1.4678189187481074, "learning_rate": 9.297455084233826e-06, "loss": 0.4221, "step": 2011 }, { "epoch": 0.19581508515815085, "grad_norm": 1.1014848505883976, "learning_rate": 9.296649217627863e-06, "loss": 0.2531, "step": 2012 }, { "epoch": 0.19591240875912408, "grad_norm": 1.553421501855423, "learning_rate": 9.295842924061227e-06, "loss": 0.5409, "step": 2013 }, { "epoch": 0.19600973236009733, "grad_norm": 1.598118050761176, "learning_rate": 9.295036203614039e-06, "loss": 0.4084, "step": 2014 }, { "epoch": 0.19610705596107056, "grad_norm": 1.6278848716274248, "learning_rate": 9.294229056366464e-06, "loss": 0.5842, "step": 2015 }, { "epoch": 0.19620437956204378, "grad_norm": 1.243515264701947, "learning_rate": 9.293421482398708e-06, "loss": 0.3504, "step": 2016 }, { "epoch": 0.19630170316301704, "grad_norm": 1.4687425329140307, "learning_rate": 9.29261348179102e-06, "loss": 0.2732, "step": 2017 }, { "epoch": 0.19639902676399026, "grad_norm": 1.8000259635960119, "learning_rate": 9.291805054623691e-06, "loss": 0.7865, "step": 2018 }, { "epoch": 0.1964963503649635, "grad_norm": 1.5721673591186547, "learning_rate": 9.290996200977058e-06, "loss": 0.5686, "step": 2019 }, { "epoch": 0.19659367396593674, "grad_norm": 1.4634877349944297, "learning_rate": 9.290186920931493e-06, "loss": 0.4884, "step": 2020 }, { "epoch": 0.19669099756690997, "grad_norm": 1.8795352763168436, "learning_rate": 9.289377214567418e-06, "loss": 0.279, "step": 2021 }, { "epoch": 0.19678832116788322, "grad_norm": 1.2525962570268505, "learning_rate": 9.288567081965292e-06, "loss": 0.3003, "step": 2022 }, { "epoch": 0.19688564476885645, "grad_norm": 1.4414518188882164, "learning_rate": 9.28775652320562e-06, "loss": 0.2883, "step": 2023 }, { "epoch": 0.19698296836982968, "grad_norm": 1.1469869990322892, "learning_rate": 9.286945538368946e-06, "loss": 0.301, "step": 2024 }, { "epoch": 0.19708029197080293, "grad_norm": 1.4386800814955665, "learning_rate": 9.286134127535859e-06, "loss": 0.417, "step": 2025 }, { "epoch": 0.19717761557177615, "grad_norm": 1.4334168701816348, "learning_rate": 9.28532229078699e-06, "loss": 0.4694, "step": 2026 }, { "epoch": 0.19727493917274938, "grad_norm": 1.2925159318336792, "learning_rate": 9.28451002820301e-06, "loss": 0.4438, "step": 2027 }, { "epoch": 0.19737226277372263, "grad_norm": 1.1608723700468837, "learning_rate": 9.283697339864635e-06, "loss": 0.3899, "step": 2028 }, { "epoch": 0.19746958637469586, "grad_norm": 1.0831308664734243, "learning_rate": 9.282884225852625e-06, "loss": 0.3594, "step": 2029 }, { "epoch": 0.19756690997566909, "grad_norm": 1.3854325468066278, "learning_rate": 9.282070686247773e-06, "loss": 0.5111, "step": 2030 }, { "epoch": 0.19766423357664234, "grad_norm": 1.2843702051671877, "learning_rate": 9.281256721130927e-06, "loss": 0.3298, "step": 2031 }, { "epoch": 0.19776155717761557, "grad_norm": 1.4725158786403292, "learning_rate": 9.280442330582968e-06, "loss": 0.4776, "step": 2032 }, { "epoch": 0.19785888077858882, "grad_norm": 1.2748346913452204, "learning_rate": 9.279627514684826e-06, "loss": 0.4438, "step": 2033 }, { "epoch": 0.19795620437956205, "grad_norm": 1.406716290626126, "learning_rate": 9.278812273517465e-06, "loss": 0.2814, "step": 2034 }, { "epoch": 0.19805352798053527, "grad_norm": 1.3303438388967537, "learning_rate": 9.2779966071619e-06, "loss": 0.4314, "step": 2035 }, { "epoch": 0.19815085158150852, "grad_norm": 1.4134730169408085, "learning_rate": 9.277180515699183e-06, "loss": 0.2764, "step": 2036 }, { "epoch": 0.19824817518248175, "grad_norm": 1.3255645305073551, "learning_rate": 9.276363999210407e-06, "loss": 0.4347, "step": 2037 }, { "epoch": 0.19834549878345498, "grad_norm": 1.4369644328356708, "learning_rate": 9.275547057776713e-06, "loss": 0.3551, "step": 2038 }, { "epoch": 0.19844282238442823, "grad_norm": 1.748281657046459, "learning_rate": 9.27472969147928e-06, "loss": 0.4372, "step": 2039 }, { "epoch": 0.19854014598540146, "grad_norm": 1.2795189118800725, "learning_rate": 9.273911900399331e-06, "loss": 0.4431, "step": 2040 }, { "epoch": 0.1986374695863747, "grad_norm": 1.165526474375854, "learning_rate": 9.273093684618129e-06, "loss": 0.2936, "step": 2041 }, { "epoch": 0.19873479318734794, "grad_norm": 1.6068781771010836, "learning_rate": 9.272275044216981e-06, "loss": 0.5125, "step": 2042 }, { "epoch": 0.19883211678832116, "grad_norm": 1.4210491087425543, "learning_rate": 9.271455979277234e-06, "loss": 0.4142, "step": 2043 }, { "epoch": 0.19892944038929442, "grad_norm": 1.6609287753373938, "learning_rate": 9.270636489880283e-06, "loss": 0.6728, "step": 2044 }, { "epoch": 0.19902676399026764, "grad_norm": 1.3902108507987736, "learning_rate": 9.26981657610756e-06, "loss": 0.3492, "step": 2045 }, { "epoch": 0.19912408759124087, "grad_norm": 1.6316422644879316, "learning_rate": 9.268996238040537e-06, "loss": 0.5029, "step": 2046 }, { "epoch": 0.19922141119221412, "grad_norm": 1.2841836791466006, "learning_rate": 9.268175475760734e-06, "loss": 0.3849, "step": 2047 }, { "epoch": 0.19931873479318735, "grad_norm": 1.319713524379575, "learning_rate": 9.267354289349712e-06, "loss": 0.4439, "step": 2048 }, { "epoch": 0.19941605839416057, "grad_norm": 1.3549935774985267, "learning_rate": 9.266532678889071e-06, "loss": 0.4382, "step": 2049 }, { "epoch": 0.19951338199513383, "grad_norm": 1.8518976479625036, "learning_rate": 9.265710644460455e-06, "loss": 0.8216, "step": 2050 }, { "epoch": 0.19961070559610705, "grad_norm": 1.9509154982810264, "learning_rate": 9.26488818614555e-06, "loss": 0.4607, "step": 2051 }, { "epoch": 0.1997080291970803, "grad_norm": 1.2954164138913125, "learning_rate": 9.264065304026087e-06, "loss": 0.4257, "step": 2052 }, { "epoch": 0.19980535279805353, "grad_norm": 1.925685176039115, "learning_rate": 9.26324199818383e-06, "loss": 0.6025, "step": 2053 }, { "epoch": 0.19990267639902676, "grad_norm": 1.533947029174009, "learning_rate": 9.262418268700596e-06, "loss": 0.5443, "step": 2054 }, { "epoch": 0.2, "grad_norm": 1.4995274594175463, "learning_rate": 9.26159411565824e-06, "loss": 0.5023, "step": 2055 }, { "epoch": 0.20009732360097324, "grad_norm": 1.4350182215101954, "learning_rate": 9.26076953913866e-06, "loss": 0.3726, "step": 2056 }, { "epoch": 0.20019464720194646, "grad_norm": 1.3019491914952392, "learning_rate": 9.259944539223788e-06, "loss": 0.4765, "step": 2057 }, { "epoch": 0.20029197080291972, "grad_norm": 1.3884509805578256, "learning_rate": 9.25911911599561e-06, "loss": 0.338, "step": 2058 }, { "epoch": 0.20038929440389294, "grad_norm": 1.488048064619486, "learning_rate": 9.258293269536146e-06, "loss": 0.5872, "step": 2059 }, { "epoch": 0.20048661800486617, "grad_norm": 1.1548733119099643, "learning_rate": 9.257466999927464e-06, "loss": 0.3242, "step": 2060 }, { "epoch": 0.20058394160583942, "grad_norm": 1.048222542797774, "learning_rate": 9.25664030725167e-06, "loss": 0.3253, "step": 2061 }, { "epoch": 0.20068126520681265, "grad_norm": 1.211590892113714, "learning_rate": 9.255813191590912e-06, "loss": 0.3414, "step": 2062 }, { "epoch": 0.2007785888077859, "grad_norm": 1.3770802107798175, "learning_rate": 9.254985653027382e-06, "loss": 0.4031, "step": 2063 }, { "epoch": 0.20087591240875913, "grad_norm": 1.4503315973945832, "learning_rate": 9.25415769164331e-06, "loss": 0.4799, "step": 2064 }, { "epoch": 0.20097323600973235, "grad_norm": 1.3613570222565128, "learning_rate": 9.253329307520976e-06, "loss": 0.3932, "step": 2065 }, { "epoch": 0.2010705596107056, "grad_norm": 1.436956883536887, "learning_rate": 9.252500500742692e-06, "loss": 0.51, "step": 2066 }, { "epoch": 0.20116788321167883, "grad_norm": 1.3042874208229347, "learning_rate": 9.25167127139082e-06, "loss": 0.3702, "step": 2067 }, { "epoch": 0.20126520681265206, "grad_norm": 1.4601934649693376, "learning_rate": 9.250841619547762e-06, "loss": 0.3927, "step": 2068 }, { "epoch": 0.2013625304136253, "grad_norm": 1.4877017036692342, "learning_rate": 9.250011545295959e-06, "loss": 0.5463, "step": 2069 }, { "epoch": 0.20145985401459854, "grad_norm": 1.3385891837902342, "learning_rate": 9.249181048717895e-06, "loss": 0.3052, "step": 2070 }, { "epoch": 0.20155717761557176, "grad_norm": 1.111892744483471, "learning_rate": 9.2483501298961e-06, "loss": 0.2342, "step": 2071 }, { "epoch": 0.20165450121654502, "grad_norm": 1.4336755713622584, "learning_rate": 9.247518788913141e-06, "loss": 0.4416, "step": 2072 }, { "epoch": 0.20175182481751824, "grad_norm": 1.4682039909825075, "learning_rate": 9.246687025851629e-06, "loss": 0.3044, "step": 2073 }, { "epoch": 0.2018491484184915, "grad_norm": 1.1356161216510552, "learning_rate": 9.245854840794217e-06, "loss": 0.2913, "step": 2074 }, { "epoch": 0.20194647201946472, "grad_norm": 1.2497989015941582, "learning_rate": 9.2450222338236e-06, "loss": 0.356, "step": 2075 }, { "epoch": 0.20204379562043795, "grad_norm": 1.4662802201560914, "learning_rate": 9.244189205022514e-06, "loss": 0.5234, "step": 2076 }, { "epoch": 0.2021411192214112, "grad_norm": 1.1493994388606168, "learning_rate": 9.243355754473738e-06, "loss": 0.3862, "step": 2077 }, { "epoch": 0.20223844282238443, "grad_norm": 1.1352456631925198, "learning_rate": 9.242521882260093e-06, "loss": 0.3693, "step": 2078 }, { "epoch": 0.20233576642335765, "grad_norm": 1.4112847797443164, "learning_rate": 9.24168758846444e-06, "loss": 0.4667, "step": 2079 }, { "epoch": 0.2024330900243309, "grad_norm": 1.9587086933310962, "learning_rate": 9.240852873169686e-06, "loss": 0.5446, "step": 2080 }, { "epoch": 0.20253041362530413, "grad_norm": 1.4532595336328356, "learning_rate": 9.240017736458772e-06, "loss": 0.56, "step": 2081 }, { "epoch": 0.20262773722627736, "grad_norm": 1.1373158358211433, "learning_rate": 9.239182178414694e-06, "loss": 0.3998, "step": 2082 }, { "epoch": 0.2027250608272506, "grad_norm": 1.4892855081953407, "learning_rate": 9.238346199120473e-06, "loss": 0.5564, "step": 2083 }, { "epoch": 0.20282238442822384, "grad_norm": 1.4122351541601532, "learning_rate": 9.237509798659188e-06, "loss": 0.4407, "step": 2084 }, { "epoch": 0.2029197080291971, "grad_norm": 1.266747153803517, "learning_rate": 9.236672977113948e-06, "loss": 0.3898, "step": 2085 }, { "epoch": 0.20301703163017032, "grad_norm": 1.3972737248894866, "learning_rate": 9.23583573456791e-06, "loss": 0.4855, "step": 2086 }, { "epoch": 0.20311435523114355, "grad_norm": 1.6424190339871019, "learning_rate": 9.234998071104272e-06, "loss": 0.732, "step": 2087 }, { "epoch": 0.2032116788321168, "grad_norm": 1.4973722328869334, "learning_rate": 9.234159986806275e-06, "loss": 0.4796, "step": 2088 }, { "epoch": 0.20330900243309002, "grad_norm": 1.5629802728678386, "learning_rate": 9.233321481757196e-06, "loss": 0.4762, "step": 2089 }, { "epoch": 0.20340632603406325, "grad_norm": 1.5273353205689704, "learning_rate": 9.23248255604036e-06, "loss": 0.6446, "step": 2090 }, { "epoch": 0.2035036496350365, "grad_norm": 1.3835329237350877, "learning_rate": 9.231643209739128e-06, "loss": 0.5297, "step": 2091 }, { "epoch": 0.20360097323600973, "grad_norm": 1.2187102873763251, "learning_rate": 9.230803442936911e-06, "loss": 0.3727, "step": 2092 }, { "epoch": 0.20369829683698296, "grad_norm": 1.325749011032711, "learning_rate": 9.229963255717156e-06, "loss": 0.5476, "step": 2093 }, { "epoch": 0.2037956204379562, "grad_norm": 1.1246093495598513, "learning_rate": 9.229122648163351e-06, "loss": 0.3309, "step": 2094 }, { "epoch": 0.20389294403892944, "grad_norm": 1.3415111254139396, "learning_rate": 9.22828162035903e-06, "loss": 0.4226, "step": 2095 }, { "epoch": 0.2039902676399027, "grad_norm": 1.2431047519820402, "learning_rate": 9.227440172387766e-06, "loss": 0.2364, "step": 2096 }, { "epoch": 0.20408759124087592, "grad_norm": 1.59824202042343, "learning_rate": 9.226598304333175e-06, "loss": 0.5713, "step": 2097 }, { "epoch": 0.20418491484184914, "grad_norm": 1.3718145057357327, "learning_rate": 9.22575601627891e-06, "loss": 0.4366, "step": 2098 }, { "epoch": 0.2042822384428224, "grad_norm": 1.8310954422547832, "learning_rate": 9.224913308308672e-06, "loss": 0.4098, "step": 2099 }, { "epoch": 0.20437956204379562, "grad_norm": 1.3433956299970118, "learning_rate": 9.224070180506202e-06, "loss": 0.2959, "step": 2100 }, { "epoch": 0.20447688564476885, "grad_norm": 1.0277615122037833, "learning_rate": 9.223226632955283e-06, "loss": 0.265, "step": 2101 }, { "epoch": 0.2045742092457421, "grad_norm": 1.2285380399323877, "learning_rate": 9.222382665739737e-06, "loss": 0.3844, "step": 2102 }, { "epoch": 0.20467153284671533, "grad_norm": 1.1151094116106592, "learning_rate": 9.221538278943432e-06, "loss": 0.2461, "step": 2103 }, { "epoch": 0.20476885644768855, "grad_norm": 1.5239102143699876, "learning_rate": 9.22069347265027e-06, "loss": 0.4239, "step": 2104 }, { "epoch": 0.2048661800486618, "grad_norm": 1.6502658051911525, "learning_rate": 9.219848246944206e-06, "loss": 0.6723, "step": 2105 }, { "epoch": 0.20496350364963503, "grad_norm": 1.638974040274465, "learning_rate": 9.219002601909229e-06, "loss": 0.5068, "step": 2106 }, { "epoch": 0.20506082725060829, "grad_norm": 1.4649352184984061, "learning_rate": 9.218156537629368e-06, "loss": 0.4698, "step": 2107 }, { "epoch": 0.2051581508515815, "grad_norm": 1.5070786345583258, "learning_rate": 9.217310054188699e-06, "loss": 0.4654, "step": 2108 }, { "epoch": 0.20525547445255474, "grad_norm": 1.2480947756940115, "learning_rate": 9.216463151671338e-06, "loss": 0.3614, "step": 2109 }, { "epoch": 0.205352798053528, "grad_norm": 1.6536121595263205, "learning_rate": 9.215615830161443e-06, "loss": 0.5872, "step": 2110 }, { "epoch": 0.20545012165450122, "grad_norm": 1.5559546132859907, "learning_rate": 9.214768089743211e-06, "loss": 0.5098, "step": 2111 }, { "epoch": 0.20554744525547444, "grad_norm": 1.5691593927804695, "learning_rate": 9.213919930500884e-06, "loss": 0.3845, "step": 2112 }, { "epoch": 0.2056447688564477, "grad_norm": 1.4385010923740136, "learning_rate": 9.213071352518744e-06, "loss": 0.4035, "step": 2113 }, { "epoch": 0.20574209245742092, "grad_norm": 1.2415148755341134, "learning_rate": 9.212222355881111e-06, "loss": 0.2503, "step": 2114 }, { "epoch": 0.20583941605839415, "grad_norm": 1.597224767194554, "learning_rate": 9.211372940672356e-06, "loss": 0.3831, "step": 2115 }, { "epoch": 0.2059367396593674, "grad_norm": 1.3936071245663937, "learning_rate": 9.210523106976884e-06, "loss": 0.3664, "step": 2116 }, { "epoch": 0.20603406326034063, "grad_norm": 1.4335641468120297, "learning_rate": 9.209672854879142e-06, "loss": 0.3182, "step": 2117 }, { "epoch": 0.20613138686131388, "grad_norm": 1.2544256067640176, "learning_rate": 9.20882218446362e-06, "loss": 0.2678, "step": 2118 }, { "epoch": 0.2062287104622871, "grad_norm": 1.4867246001264303, "learning_rate": 9.207971095814852e-06, "loss": 0.4934, "step": 2119 }, { "epoch": 0.20632603406326033, "grad_norm": 1.5387304887069146, "learning_rate": 9.207119589017408e-06, "loss": 0.4552, "step": 2120 }, { "epoch": 0.2064233576642336, "grad_norm": 1.507156387441411, "learning_rate": 9.206267664155906e-06, "loss": 0.4209, "step": 2121 }, { "epoch": 0.2065206812652068, "grad_norm": 1.3407732350308024, "learning_rate": 9.205415321315e-06, "loss": 0.4256, "step": 2122 }, { "epoch": 0.20661800486618004, "grad_norm": 1.6313949345186305, "learning_rate": 9.20456256057939e-06, "loss": 0.4727, "step": 2123 }, { "epoch": 0.2067153284671533, "grad_norm": 1.695026004332969, "learning_rate": 9.203709382033814e-06, "loss": 0.6547, "step": 2124 }, { "epoch": 0.20681265206812652, "grad_norm": 1.5677721722384952, "learning_rate": 9.202855785763053e-06, "loss": 0.4469, "step": 2125 }, { "epoch": 0.20690997566909974, "grad_norm": 1.4276579746412523, "learning_rate": 9.202001771851928e-06, "loss": 0.4511, "step": 2126 }, { "epoch": 0.207007299270073, "grad_norm": 1.365652083209099, "learning_rate": 9.201147340385304e-06, "loss": 0.4435, "step": 2127 }, { "epoch": 0.20710462287104622, "grad_norm": 1.4014399599326692, "learning_rate": 9.200292491448086e-06, "loss": 0.4017, "step": 2128 }, { "epoch": 0.20720194647201948, "grad_norm": 1.4131798281318602, "learning_rate": 9.199437225125223e-06, "loss": 0.2781, "step": 2129 }, { "epoch": 0.2072992700729927, "grad_norm": 1.3392698432345278, "learning_rate": 9.198581541501702e-06, "loss": 0.3576, "step": 2130 }, { "epoch": 0.20739659367396593, "grad_norm": 1.2859171090531423, "learning_rate": 9.197725440662552e-06, "loss": 0.4505, "step": 2131 }, { "epoch": 0.20749391727493918, "grad_norm": 1.3075221898254676, "learning_rate": 9.196868922692845e-06, "loss": 0.42, "step": 2132 }, { "epoch": 0.2075912408759124, "grad_norm": 1.3120969425940014, "learning_rate": 9.196011987677693e-06, "loss": 0.3918, "step": 2133 }, { "epoch": 0.20768856447688563, "grad_norm": 1.2917866907447901, "learning_rate": 9.19515463570225e-06, "loss": 0.4515, "step": 2134 }, { "epoch": 0.2077858880778589, "grad_norm": 1.4964227937052923, "learning_rate": 9.194296866851714e-06, "loss": 0.4007, "step": 2135 }, { "epoch": 0.2078832116788321, "grad_norm": 1.4096694486456338, "learning_rate": 9.19343868121132e-06, "loss": 0.5684, "step": 2136 }, { "epoch": 0.20798053527980534, "grad_norm": 1.1303877036272907, "learning_rate": 9.192580078866346e-06, "loss": 0.2661, "step": 2137 }, { "epoch": 0.2080778588807786, "grad_norm": 1.4056619474271335, "learning_rate": 9.191721059902112e-06, "loss": 0.4174, "step": 2138 }, { "epoch": 0.20817518248175182, "grad_norm": 1.7142064467904727, "learning_rate": 9.190861624403981e-06, "loss": 0.4453, "step": 2139 }, { "epoch": 0.20827250608272507, "grad_norm": 1.3293557691236777, "learning_rate": 9.190001772457356e-06, "loss": 0.4541, "step": 2140 }, { "epoch": 0.2083698296836983, "grad_norm": 1.6131133576379075, "learning_rate": 9.189141504147676e-06, "loss": 0.3751, "step": 2141 }, { "epoch": 0.20846715328467152, "grad_norm": 1.509737357483189, "learning_rate": 9.188280819560431e-06, "loss": 0.4757, "step": 2142 }, { "epoch": 0.20856447688564478, "grad_norm": 1.479538114231473, "learning_rate": 9.187419718781149e-06, "loss": 0.3243, "step": 2143 }, { "epoch": 0.208661800486618, "grad_norm": 1.4973982658919327, "learning_rate": 9.186558201895395e-06, "loss": 0.3732, "step": 2144 }, { "epoch": 0.20875912408759123, "grad_norm": 1.5121453838943797, "learning_rate": 9.185696268988777e-06, "loss": 0.5435, "step": 2145 }, { "epoch": 0.20885644768856448, "grad_norm": 1.7349033410138828, "learning_rate": 9.18483392014695e-06, "loss": 0.6415, "step": 2146 }, { "epoch": 0.2089537712895377, "grad_norm": 1.4812330220855032, "learning_rate": 9.183971155455602e-06, "loss": 0.4961, "step": 2147 }, { "epoch": 0.20905109489051094, "grad_norm": 1.5121767597167877, "learning_rate": 9.183107975000472e-06, "loss": 0.5298, "step": 2148 }, { "epoch": 0.2091484184914842, "grad_norm": 1.5424817825799644, "learning_rate": 9.18224437886733e-06, "loss": 0.4577, "step": 2149 }, { "epoch": 0.20924574209245742, "grad_norm": 1.2733853569354763, "learning_rate": 9.181380367141991e-06, "loss": 0.3306, "step": 2150 }, { "epoch": 0.20934306569343067, "grad_norm": 1.1384650904715041, "learning_rate": 9.180515939910317e-06, "loss": 0.3831, "step": 2151 }, { "epoch": 0.2094403892944039, "grad_norm": 1.3798308474076018, "learning_rate": 9.179651097258204e-06, "loss": 0.4629, "step": 2152 }, { "epoch": 0.20953771289537712, "grad_norm": 1.4059733648531154, "learning_rate": 9.178785839271593e-06, "loss": 0.4526, "step": 2153 }, { "epoch": 0.20963503649635037, "grad_norm": 1.581039004516103, "learning_rate": 9.177920166036464e-06, "loss": 0.5397, "step": 2154 }, { "epoch": 0.2097323600973236, "grad_norm": 1.4851118969101265, "learning_rate": 9.17705407763884e-06, "loss": 0.5052, "step": 2155 }, { "epoch": 0.20982968369829683, "grad_norm": 1.3633687775503893, "learning_rate": 9.176187574164785e-06, "loss": 0.4427, "step": 2156 }, { "epoch": 0.20992700729927008, "grad_norm": 1.360319094739405, "learning_rate": 9.175320655700407e-06, "loss": 0.3649, "step": 2157 }, { "epoch": 0.2100243309002433, "grad_norm": 1.3829673206277566, "learning_rate": 9.174453322331844e-06, "loss": 0.3536, "step": 2158 }, { "epoch": 0.21012165450121653, "grad_norm": 1.5804059757696094, "learning_rate": 9.173585574145292e-06, "loss": 0.5937, "step": 2159 }, { "epoch": 0.21021897810218979, "grad_norm": 1.4991084469228289, "learning_rate": 9.172717411226975e-06, "loss": 0.3523, "step": 2160 }, { "epoch": 0.210316301703163, "grad_norm": 1.4762289487935065, "learning_rate": 9.171848833663165e-06, "loss": 0.4991, "step": 2161 }, { "epoch": 0.21041362530413626, "grad_norm": 1.4858484283610454, "learning_rate": 9.17097984154017e-06, "loss": 0.5153, "step": 2162 }, { "epoch": 0.2105109489051095, "grad_norm": 1.2647097068290445, "learning_rate": 9.170110434944345e-06, "loss": 0.3193, "step": 2163 }, { "epoch": 0.21060827250608272, "grad_norm": 1.6889738075479466, "learning_rate": 9.169240613962086e-06, "loss": 0.4755, "step": 2164 }, { "epoch": 0.21070559610705597, "grad_norm": 1.6464662019172414, "learning_rate": 9.168370378679821e-06, "loss": 0.5303, "step": 2165 }, { "epoch": 0.2108029197080292, "grad_norm": 1.287927301108519, "learning_rate": 9.16749972918403e-06, "loss": 0.3231, "step": 2166 }, { "epoch": 0.21090024330900242, "grad_norm": 1.378935902738664, "learning_rate": 9.16662866556123e-06, "loss": 0.4654, "step": 2167 }, { "epoch": 0.21099756690997568, "grad_norm": 1.415652566603492, "learning_rate": 9.16575718789798e-06, "loss": 0.42, "step": 2168 }, { "epoch": 0.2110948905109489, "grad_norm": 1.189498123796033, "learning_rate": 9.164885296280875e-06, "loss": 0.3529, "step": 2169 }, { "epoch": 0.21119221411192213, "grad_norm": 1.5371351227791108, "learning_rate": 9.16401299079656e-06, "loss": 0.4679, "step": 2170 }, { "epoch": 0.21128953771289538, "grad_norm": 1.2493790037654902, "learning_rate": 9.163140271531714e-06, "loss": 0.3793, "step": 2171 }, { "epoch": 0.2113868613138686, "grad_norm": 1.3836947713855836, "learning_rate": 9.16226713857306e-06, "loss": 0.436, "step": 2172 }, { "epoch": 0.21148418491484186, "grad_norm": 1.583280621035993, "learning_rate": 9.161393592007364e-06, "loss": 0.5673, "step": 2173 }, { "epoch": 0.2115815085158151, "grad_norm": 1.336076606512916, "learning_rate": 9.160519631921427e-06, "loss": 0.418, "step": 2174 }, { "epoch": 0.2116788321167883, "grad_norm": 1.5539773056945747, "learning_rate": 9.159645258402098e-06, "loss": 0.4417, "step": 2175 }, { "epoch": 0.21177615571776157, "grad_norm": 1.35099904216899, "learning_rate": 9.158770471536261e-06, "loss": 0.4389, "step": 2176 }, { "epoch": 0.2118734793187348, "grad_norm": 1.5960801985245197, "learning_rate": 9.157895271410848e-06, "loss": 0.4444, "step": 2177 }, { "epoch": 0.21197080291970802, "grad_norm": 1.343338393224711, "learning_rate": 9.157019658112825e-06, "loss": 0.3867, "step": 2178 }, { "epoch": 0.21206812652068127, "grad_norm": 1.573040163695098, "learning_rate": 9.156143631729205e-06, "loss": 0.5564, "step": 2179 }, { "epoch": 0.2121654501216545, "grad_norm": 1.477194998770335, "learning_rate": 9.155267192347037e-06, "loss": 0.5053, "step": 2180 }, { "epoch": 0.21226277372262772, "grad_norm": 1.4697445687746653, "learning_rate": 9.154390340053414e-06, "loss": 0.4462, "step": 2181 }, { "epoch": 0.21236009732360098, "grad_norm": 1.2383233673923462, "learning_rate": 9.15351307493547e-06, "loss": 0.4023, "step": 2182 }, { "epoch": 0.2124574209245742, "grad_norm": 1.73929160255024, "learning_rate": 9.152635397080377e-06, "loss": 0.456, "step": 2183 }, { "epoch": 0.21255474452554746, "grad_norm": 1.814215933055299, "learning_rate": 9.151757306575354e-06, "loss": 0.5283, "step": 2184 }, { "epoch": 0.21265206812652068, "grad_norm": 1.440140413882406, "learning_rate": 9.150878803507655e-06, "loss": 0.4754, "step": 2185 }, { "epoch": 0.2127493917274939, "grad_norm": 1.4991761170210094, "learning_rate": 9.149999887964577e-06, "loss": 0.4244, "step": 2186 }, { "epoch": 0.21284671532846716, "grad_norm": 1.6045542244692401, "learning_rate": 9.149120560033461e-06, "loss": 0.4149, "step": 2187 }, { "epoch": 0.2129440389294404, "grad_norm": 1.6999406355422166, "learning_rate": 9.148240819801684e-06, "loss": 0.7227, "step": 2188 }, { "epoch": 0.2130413625304136, "grad_norm": 1.5383336234101048, "learning_rate": 9.147360667356667e-06, "loss": 0.4102, "step": 2189 }, { "epoch": 0.21313868613138687, "grad_norm": 1.3100772476716567, "learning_rate": 9.146480102785871e-06, "loss": 0.4001, "step": 2190 }, { "epoch": 0.2132360097323601, "grad_norm": 1.2113504505529646, "learning_rate": 9.1455991261768e-06, "loss": 0.3906, "step": 2191 }, { "epoch": 0.21333333333333335, "grad_norm": 2.1524156395732996, "learning_rate": 9.144717737616994e-06, "loss": 0.3722, "step": 2192 }, { "epoch": 0.21343065693430657, "grad_norm": 1.3156410053212892, "learning_rate": 9.143835937194039e-06, "loss": 0.414, "step": 2193 }, { "epoch": 0.2135279805352798, "grad_norm": 1.382537469614808, "learning_rate": 9.14295372499556e-06, "loss": 0.3687, "step": 2194 }, { "epoch": 0.21362530413625305, "grad_norm": 1.4106617705657403, "learning_rate": 9.142071101109224e-06, "loss": 0.2515, "step": 2195 }, { "epoch": 0.21372262773722628, "grad_norm": 1.4292530170893925, "learning_rate": 9.141188065622736e-06, "loss": 0.4671, "step": 2196 }, { "epoch": 0.2138199513381995, "grad_norm": 1.371262803483025, "learning_rate": 9.140304618623844e-06, "loss": 0.4397, "step": 2197 }, { "epoch": 0.21391727493917276, "grad_norm": 1.3337172412513854, "learning_rate": 9.13942076020034e-06, "loss": 0.4518, "step": 2198 }, { "epoch": 0.21401459854014598, "grad_norm": 1.195478639712577, "learning_rate": 9.138536490440046e-06, "loss": 0.3236, "step": 2199 }, { "epoch": 0.2141119221411192, "grad_norm": 1.6207375008593756, "learning_rate": 9.13765180943084e-06, "loss": 0.5147, "step": 2200 }, { "epoch": 0.21420924574209246, "grad_norm": 1.457360033672521, "learning_rate": 9.136766717260631e-06, "loss": 0.3228, "step": 2201 }, { "epoch": 0.2143065693430657, "grad_norm": 1.2314544120773039, "learning_rate": 9.13588121401737e-06, "loss": 0.3413, "step": 2202 }, { "epoch": 0.21440389294403894, "grad_norm": 1.3614880154600904, "learning_rate": 9.13499529978905e-06, "loss": 0.3902, "step": 2203 }, { "epoch": 0.21450121654501217, "grad_norm": 1.3431981306372034, "learning_rate": 9.134108974663707e-06, "loss": 0.4893, "step": 2204 }, { "epoch": 0.2145985401459854, "grad_norm": 1.346114362934121, "learning_rate": 9.133222238729414e-06, "loss": 0.4195, "step": 2205 }, { "epoch": 0.21469586374695865, "grad_norm": 1.2405202461045035, "learning_rate": 9.132335092074285e-06, "loss": 0.4373, "step": 2206 }, { "epoch": 0.21479318734793187, "grad_norm": 1.2952176269832685, "learning_rate": 9.131447534786478e-06, "loss": 0.3253, "step": 2207 }, { "epoch": 0.2148905109489051, "grad_norm": 1.3497804127584312, "learning_rate": 9.130559566954191e-06, "loss": 0.4401, "step": 2208 }, { "epoch": 0.21498783454987835, "grad_norm": 1.6094605506454212, "learning_rate": 9.129671188665661e-06, "loss": 0.5943, "step": 2209 }, { "epoch": 0.21508515815085158, "grad_norm": 1.7393737788578179, "learning_rate": 9.128782400009167e-06, "loss": 0.6832, "step": 2210 }, { "epoch": 0.2151824817518248, "grad_norm": 1.2888456219960003, "learning_rate": 9.127893201073028e-06, "loss": 0.4449, "step": 2211 }, { "epoch": 0.21527980535279806, "grad_norm": 1.6231451452368957, "learning_rate": 9.127003591945605e-06, "loss": 0.6579, "step": 2212 }, { "epoch": 0.21537712895377129, "grad_norm": 1.4013330754504585, "learning_rate": 9.126113572715296e-06, "loss": 0.5072, "step": 2213 }, { "epoch": 0.21547445255474454, "grad_norm": 1.1928349667862592, "learning_rate": 9.125223143470547e-06, "loss": 0.2896, "step": 2214 }, { "epoch": 0.21557177615571776, "grad_norm": 1.3027255903002162, "learning_rate": 9.124332304299838e-06, "loss": 0.3076, "step": 2215 }, { "epoch": 0.215669099756691, "grad_norm": 1.6527022746103417, "learning_rate": 9.123441055291694e-06, "loss": 0.4688, "step": 2216 }, { "epoch": 0.21576642335766424, "grad_norm": 1.3197927862863625, "learning_rate": 9.122549396534676e-06, "loss": 0.318, "step": 2217 }, { "epoch": 0.21586374695863747, "grad_norm": 1.5297610770776902, "learning_rate": 9.121657328117392e-06, "loss": 0.6176, "step": 2218 }, { "epoch": 0.2159610705596107, "grad_norm": 1.338041823259507, "learning_rate": 9.120764850128486e-06, "loss": 0.3941, "step": 2219 }, { "epoch": 0.21605839416058395, "grad_norm": 1.200858421054794, "learning_rate": 9.119871962656644e-06, "loss": 0.3758, "step": 2220 }, { "epoch": 0.21615571776155718, "grad_norm": 1.5023816592412242, "learning_rate": 9.118978665790592e-06, "loss": 0.5032, "step": 2221 }, { "epoch": 0.2162530413625304, "grad_norm": 1.2258656459952086, "learning_rate": 9.118084959619099e-06, "loss": 0.4489, "step": 2222 }, { "epoch": 0.21635036496350366, "grad_norm": 1.717063075964899, "learning_rate": 9.117190844230971e-06, "loss": 0.7762, "step": 2223 }, { "epoch": 0.21644768856447688, "grad_norm": 1.210140433555958, "learning_rate": 9.11629631971506e-06, "loss": 0.4431, "step": 2224 }, { "epoch": 0.21654501216545013, "grad_norm": 1.4188251693910732, "learning_rate": 9.115401386160252e-06, "loss": 0.3495, "step": 2225 }, { "epoch": 0.21664233576642336, "grad_norm": 2.073136961715272, "learning_rate": 9.11450604365548e-06, "loss": 0.4268, "step": 2226 }, { "epoch": 0.2167396593673966, "grad_norm": 1.5265588328884594, "learning_rate": 9.113610292289714e-06, "loss": 0.4303, "step": 2227 }, { "epoch": 0.21683698296836984, "grad_norm": 1.3220401272995868, "learning_rate": 9.112714132151963e-06, "loss": 0.4221, "step": 2228 }, { "epoch": 0.21693430656934307, "grad_norm": 1.4088441022230214, "learning_rate": 9.111817563331282e-06, "loss": 0.1886, "step": 2229 }, { "epoch": 0.2170316301703163, "grad_norm": 1.3947572498286958, "learning_rate": 9.110920585916763e-06, "loss": 0.353, "step": 2230 }, { "epoch": 0.21712895377128955, "grad_norm": 1.2369368803593181, "learning_rate": 9.110023199997537e-06, "loss": 0.2576, "step": 2231 }, { "epoch": 0.21722627737226277, "grad_norm": 1.1860471672244592, "learning_rate": 9.10912540566278e-06, "loss": 0.3994, "step": 2232 }, { "epoch": 0.217323600973236, "grad_norm": 1.309576411449957, "learning_rate": 9.108227203001708e-06, "loss": 0.4453, "step": 2233 }, { "epoch": 0.21742092457420925, "grad_norm": 1.6554896930775824, "learning_rate": 9.10732859210357e-06, "loss": 0.589, "step": 2234 }, { "epoch": 0.21751824817518248, "grad_norm": 1.761859219992272, "learning_rate": 9.106429573057666e-06, "loss": 0.726, "step": 2235 }, { "epoch": 0.21761557177615573, "grad_norm": 1.35833156484165, "learning_rate": 9.105530145953335e-06, "loss": 0.4012, "step": 2236 }, { "epoch": 0.21771289537712896, "grad_norm": 3.4502529438559884, "learning_rate": 9.104630310879944e-06, "loss": 0.4621, "step": 2237 }, { "epoch": 0.21781021897810218, "grad_norm": 1.3357957463599541, "learning_rate": 9.103730067926922e-06, "loss": 0.317, "step": 2238 }, { "epoch": 0.21790754257907544, "grad_norm": 1.3566642568052916, "learning_rate": 9.102829417183716e-06, "loss": 0.4245, "step": 2239 }, { "epoch": 0.21800486618004866, "grad_norm": 1.673808040965782, "learning_rate": 9.10192835873983e-06, "loss": 0.6908, "step": 2240 }, { "epoch": 0.2181021897810219, "grad_norm": 1.8194308130790637, "learning_rate": 9.101026892684804e-06, "loss": 0.5157, "step": 2241 }, { "epoch": 0.21819951338199514, "grad_norm": 1.4443029228393756, "learning_rate": 9.100125019108214e-06, "loss": 0.5417, "step": 2242 }, { "epoch": 0.21829683698296837, "grad_norm": 1.4594341846039764, "learning_rate": 9.099222738099682e-06, "loss": 0.4297, "step": 2243 }, { "epoch": 0.2183941605839416, "grad_norm": 1.3121064822320374, "learning_rate": 9.098320049748864e-06, "loss": 0.4646, "step": 2244 }, { "epoch": 0.21849148418491485, "grad_norm": 1.5596348242175504, "learning_rate": 9.097416954145467e-06, "loss": 0.4877, "step": 2245 }, { "epoch": 0.21858880778588807, "grad_norm": 1.1835003302943965, "learning_rate": 9.096513451379225e-06, "loss": 0.3548, "step": 2246 }, { "epoch": 0.21868613138686133, "grad_norm": 1.4956699498169375, "learning_rate": 9.095609541539925e-06, "loss": 0.3958, "step": 2247 }, { "epoch": 0.21878345498783455, "grad_norm": 1.3761247023142853, "learning_rate": 9.094705224717388e-06, "loss": 0.4076, "step": 2248 }, { "epoch": 0.21888077858880778, "grad_norm": 1.2940624946938768, "learning_rate": 9.093800501001476e-06, "loss": 0.4989, "step": 2249 }, { "epoch": 0.21897810218978103, "grad_norm": 1.1389229499303237, "learning_rate": 9.092895370482091e-06, "loss": 0.332, "step": 2250 }, { "epoch": 0.21907542579075426, "grad_norm": 1.5338979130860617, "learning_rate": 9.091989833249179e-06, "loss": 0.5609, "step": 2251 }, { "epoch": 0.21917274939172748, "grad_norm": 1.3736786128370664, "learning_rate": 9.091083889392721e-06, "loss": 0.3767, "step": 2252 }, { "epoch": 0.21927007299270074, "grad_norm": 1.6001218689759074, "learning_rate": 9.090177539002743e-06, "loss": 0.5709, "step": 2253 }, { "epoch": 0.21936739659367396, "grad_norm": 1.2578364778685514, "learning_rate": 9.089270782169308e-06, "loss": 0.3796, "step": 2254 }, { "epoch": 0.2194647201946472, "grad_norm": 1.5508865589735865, "learning_rate": 9.088363618982523e-06, "loss": 0.5947, "step": 2255 }, { "epoch": 0.21956204379562044, "grad_norm": 1.2646857650137902, "learning_rate": 9.08745604953253e-06, "loss": 0.3024, "step": 2256 }, { "epoch": 0.21965936739659367, "grad_norm": 1.1168071392771144, "learning_rate": 9.08654807390952e-06, "loss": 0.3113, "step": 2257 }, { "epoch": 0.21975669099756692, "grad_norm": 1.238369237619726, "learning_rate": 9.085639692203713e-06, "loss": 0.2179, "step": 2258 }, { "epoch": 0.21985401459854015, "grad_norm": 1.2485790759653945, "learning_rate": 9.084730904505381e-06, "loss": 0.3763, "step": 2259 }, { "epoch": 0.21995133819951337, "grad_norm": 1.6082877032407055, "learning_rate": 9.083821710904827e-06, "loss": 0.3831, "step": 2260 }, { "epoch": 0.22004866180048663, "grad_norm": 1.3213256018887491, "learning_rate": 9.082912111492401e-06, "loss": 0.4091, "step": 2261 }, { "epoch": 0.22014598540145985, "grad_norm": 1.5899440724355371, "learning_rate": 9.08200210635849e-06, "loss": 0.4491, "step": 2262 }, { "epoch": 0.22024330900243308, "grad_norm": 1.30089465497526, "learning_rate": 9.081091695593518e-06, "loss": 0.3762, "step": 2263 }, { "epoch": 0.22034063260340633, "grad_norm": 1.5403984971127525, "learning_rate": 9.080180879287957e-06, "loss": 0.438, "step": 2264 }, { "epoch": 0.22043795620437956, "grad_norm": 1.5500984898931875, "learning_rate": 9.079269657532312e-06, "loss": 0.398, "step": 2265 }, { "epoch": 0.22053527980535279, "grad_norm": 1.4834461719298844, "learning_rate": 9.078358030417136e-06, "loss": 0.6175, "step": 2266 }, { "epoch": 0.22063260340632604, "grad_norm": 1.3553003212010182, "learning_rate": 9.077445998033015e-06, "loss": 0.2719, "step": 2267 }, { "epoch": 0.22072992700729926, "grad_norm": 1.573783871238475, "learning_rate": 9.07653356047058e-06, "loss": 0.2328, "step": 2268 }, { "epoch": 0.22082725060827252, "grad_norm": 1.54928316645126, "learning_rate": 9.075620717820498e-06, "loss": 0.3514, "step": 2269 }, { "epoch": 0.22092457420924574, "grad_norm": 1.3616253433976528, "learning_rate": 9.07470747017348e-06, "loss": 0.4636, "step": 2270 }, { "epoch": 0.22102189781021897, "grad_norm": 1.6741713680481711, "learning_rate": 9.073793817620277e-06, "loss": 0.6321, "step": 2271 }, { "epoch": 0.22111922141119222, "grad_norm": 1.3794305685281492, "learning_rate": 9.07287976025168e-06, "loss": 0.3172, "step": 2272 }, { "epoch": 0.22121654501216545, "grad_norm": 1.362894347632133, "learning_rate": 9.071965298158516e-06, "loss": 0.3989, "step": 2273 }, { "epoch": 0.22131386861313868, "grad_norm": 1.4233131262232992, "learning_rate": 9.071050431431658e-06, "loss": 0.4922, "step": 2274 }, { "epoch": 0.22141119221411193, "grad_norm": 1.4905332812995968, "learning_rate": 9.070135160162016e-06, "loss": 0.3952, "step": 2275 }, { "epoch": 0.22150851581508516, "grad_norm": 1.4389307945528345, "learning_rate": 9.069219484440541e-06, "loss": 0.4364, "step": 2276 }, { "epoch": 0.22160583941605838, "grad_norm": 1.4796907096594347, "learning_rate": 9.068303404358226e-06, "loss": 0.4842, "step": 2277 }, { "epoch": 0.22170316301703163, "grad_norm": 1.6561415294899449, "learning_rate": 9.0673869200061e-06, "loss": 0.5595, "step": 2278 }, { "epoch": 0.22180048661800486, "grad_norm": 1.4198474890784685, "learning_rate": 9.066470031475236e-06, "loss": 0.4762, "step": 2279 }, { "epoch": 0.22189781021897811, "grad_norm": 1.437724469115563, "learning_rate": 9.065552738856745e-06, "loss": 0.3687, "step": 2280 }, { "epoch": 0.22199513381995134, "grad_norm": 1.2431258010669888, "learning_rate": 9.06463504224178e-06, "loss": 0.3854, "step": 2281 }, { "epoch": 0.22209245742092457, "grad_norm": 1.362042407967867, "learning_rate": 9.063716941721534e-06, "loss": 0.3981, "step": 2282 }, { "epoch": 0.22218978102189782, "grad_norm": 1.3260780267557537, "learning_rate": 9.062798437387236e-06, "loss": 0.4304, "step": 2283 }, { "epoch": 0.22228710462287105, "grad_norm": 1.2009742636293355, "learning_rate": 9.06187952933016e-06, "loss": 0.3441, "step": 2284 }, { "epoch": 0.22238442822384427, "grad_norm": 1.7089934430562992, "learning_rate": 9.060960217641618e-06, "loss": 0.3488, "step": 2285 }, { "epoch": 0.22248175182481753, "grad_norm": 1.3539106224768682, "learning_rate": 9.060040502412965e-06, "loss": 0.3617, "step": 2286 }, { "epoch": 0.22257907542579075, "grad_norm": 1.3952537396094973, "learning_rate": 9.05912038373559e-06, "loss": 0.4507, "step": 2287 }, { "epoch": 0.22267639902676398, "grad_norm": 1.201207552744405, "learning_rate": 9.058199861700928e-06, "loss": 0.3074, "step": 2288 }, { "epoch": 0.22277372262773723, "grad_norm": 1.1918182161083974, "learning_rate": 9.057278936400453e-06, "loss": 0.3713, "step": 2289 }, { "epoch": 0.22287104622871046, "grad_norm": 1.5864015097741249, "learning_rate": 9.056357607925674e-06, "loss": 0.4651, "step": 2290 }, { "epoch": 0.2229683698296837, "grad_norm": 1.0855034708664277, "learning_rate": 9.055435876368148e-06, "loss": 0.2361, "step": 2291 }, { "epoch": 0.22306569343065694, "grad_norm": 1.1945153364440069, "learning_rate": 9.054513741819466e-06, "loss": 0.2803, "step": 2292 }, { "epoch": 0.22316301703163016, "grad_norm": 1.3734264039165323, "learning_rate": 9.053591204371262e-06, "loss": 0.3709, "step": 2293 }, { "epoch": 0.22326034063260342, "grad_norm": 1.662571628719731, "learning_rate": 9.052668264115206e-06, "loss": 0.6615, "step": 2294 }, { "epoch": 0.22335766423357664, "grad_norm": 1.4371203045482563, "learning_rate": 9.051744921143015e-06, "loss": 0.4082, "step": 2295 }, { "epoch": 0.22345498783454987, "grad_norm": 1.5571182647752952, "learning_rate": 9.050821175546442e-06, "loss": 0.5338, "step": 2296 }, { "epoch": 0.22355231143552312, "grad_norm": 1.4022335338581293, "learning_rate": 9.049897027417277e-06, "loss": 0.3933, "step": 2297 }, { "epoch": 0.22364963503649635, "grad_norm": 1.2815006290096387, "learning_rate": 9.048972476847356e-06, "loss": 0.4662, "step": 2298 }, { "epoch": 0.22374695863746957, "grad_norm": 1.4344706750679865, "learning_rate": 9.04804752392855e-06, "loss": 0.4422, "step": 2299 }, { "epoch": 0.22384428223844283, "grad_norm": 1.2984999163116793, "learning_rate": 9.047122168752775e-06, "loss": 0.3659, "step": 2300 }, { "epoch": 0.22394160583941605, "grad_norm": 1.1587669196843096, "learning_rate": 9.046196411411982e-06, "loss": 0.2974, "step": 2301 }, { "epoch": 0.2240389294403893, "grad_norm": 2.322228254141064, "learning_rate": 9.045270251998166e-06, "loss": 0.5667, "step": 2302 }, { "epoch": 0.22413625304136253, "grad_norm": 1.5137300738559605, "learning_rate": 9.044343690603358e-06, "loss": 0.3889, "step": 2303 }, { "epoch": 0.22423357664233576, "grad_norm": 1.472679239189759, "learning_rate": 9.04341672731963e-06, "loss": 0.4875, "step": 2304 }, { "epoch": 0.224330900243309, "grad_norm": 1.391957619608358, "learning_rate": 9.042489362239097e-06, "loss": 0.4513, "step": 2305 }, { "epoch": 0.22442822384428224, "grad_norm": 1.5752423841676473, "learning_rate": 9.041561595453914e-06, "loss": 0.6021, "step": 2306 }, { "epoch": 0.22452554744525546, "grad_norm": 1.340696458312585, "learning_rate": 9.040633427056268e-06, "loss": 0.36, "step": 2307 }, { "epoch": 0.22462287104622872, "grad_norm": 1.319309191993897, "learning_rate": 9.039704857138396e-06, "loss": 0.2632, "step": 2308 }, { "epoch": 0.22472019464720194, "grad_norm": 1.3567748798839634, "learning_rate": 9.03877588579257e-06, "loss": 0.4085, "step": 2309 }, { "epoch": 0.22481751824817517, "grad_norm": 1.7234931003044007, "learning_rate": 9.0378465131111e-06, "loss": 0.5366, "step": 2310 }, { "epoch": 0.22491484184914842, "grad_norm": 1.3431964443797024, "learning_rate": 9.036916739186341e-06, "loss": 0.3406, "step": 2311 }, { "epoch": 0.22501216545012165, "grad_norm": 1.6143507102825565, "learning_rate": 9.035986564110685e-06, "loss": 0.6322, "step": 2312 }, { "epoch": 0.2251094890510949, "grad_norm": 1.421713348254314, "learning_rate": 9.035055987976563e-06, "loss": 0.3963, "step": 2313 }, { "epoch": 0.22520681265206813, "grad_norm": 1.5860325075452377, "learning_rate": 9.034125010876447e-06, "loss": 0.4722, "step": 2314 }, { "epoch": 0.22530413625304135, "grad_norm": 1.633700480684755, "learning_rate": 9.03319363290285e-06, "loss": 0.2649, "step": 2315 }, { "epoch": 0.2254014598540146, "grad_norm": 1.5598775600409591, "learning_rate": 9.03226185414832e-06, "loss": 0.4778, "step": 2316 }, { "epoch": 0.22549878345498783, "grad_norm": 1.4413798673536165, "learning_rate": 9.031329674705455e-06, "loss": 0.3182, "step": 2317 }, { "epoch": 0.22559610705596106, "grad_norm": 1.437989358950148, "learning_rate": 9.03039709466688e-06, "loss": 0.4297, "step": 2318 }, { "epoch": 0.2256934306569343, "grad_norm": 1.3355568683760275, "learning_rate": 9.029464114125267e-06, "loss": 0.3393, "step": 2319 }, { "epoch": 0.22579075425790754, "grad_norm": 1.353161962413978, "learning_rate": 9.028530733173332e-06, "loss": 0.3362, "step": 2320 }, { "epoch": 0.22588807785888076, "grad_norm": 1.1699742479017108, "learning_rate": 9.027596951903819e-06, "loss": 0.3674, "step": 2321 }, { "epoch": 0.22598540145985402, "grad_norm": 1.1235278882417843, "learning_rate": 9.026662770409524e-06, "loss": 0.3209, "step": 2322 }, { "epoch": 0.22608272506082724, "grad_norm": 1.4951135995374567, "learning_rate": 9.025728188783273e-06, "loss": 0.4297, "step": 2323 }, { "epoch": 0.2261800486618005, "grad_norm": 1.3046514997255336, "learning_rate": 9.024793207117937e-06, "loss": 0.3765, "step": 2324 }, { "epoch": 0.22627737226277372, "grad_norm": 1.3346554142143854, "learning_rate": 9.023857825506426e-06, "loss": 0.5228, "step": 2325 }, { "epoch": 0.22637469586374695, "grad_norm": 1.4309619163867682, "learning_rate": 9.022922044041691e-06, "loss": 0.4605, "step": 2326 }, { "epoch": 0.2264720194647202, "grad_norm": 1.5152634651556307, "learning_rate": 9.021985862816718e-06, "loss": 0.5553, "step": 2327 }, { "epoch": 0.22656934306569343, "grad_norm": 1.3885182055556289, "learning_rate": 9.02104928192454e-06, "loss": 0.4831, "step": 2328 }, { "epoch": 0.22666666666666666, "grad_norm": 1.2729317064328092, "learning_rate": 9.020112301458221e-06, "loss": 0.4314, "step": 2329 }, { "epoch": 0.2267639902676399, "grad_norm": 0.9679503678492228, "learning_rate": 9.019174921510874e-06, "loss": 0.1925, "step": 2330 }, { "epoch": 0.22686131386861313, "grad_norm": 1.4513146393120597, "learning_rate": 9.018237142175643e-06, "loss": 0.5487, "step": 2331 }, { "epoch": 0.2269586374695864, "grad_norm": 1.5377065039176208, "learning_rate": 9.017298963545718e-06, "loss": 0.4063, "step": 2332 }, { "epoch": 0.22705596107055961, "grad_norm": 1.0180180453516632, "learning_rate": 9.016360385714324e-06, "loss": 0.2101, "step": 2333 }, { "epoch": 0.22715328467153284, "grad_norm": 1.3145676629552665, "learning_rate": 9.015421408774732e-06, "loss": 0.4575, "step": 2334 }, { "epoch": 0.2272506082725061, "grad_norm": 1.3213351651174845, "learning_rate": 9.014482032820247e-06, "loss": 0.3924, "step": 2335 }, { "epoch": 0.22734793187347932, "grad_norm": 1.9370834148842127, "learning_rate": 9.013542257944212e-06, "loss": 0.4332, "step": 2336 }, { "epoch": 0.22744525547445255, "grad_norm": 1.4754695985325648, "learning_rate": 9.012602084240018e-06, "loss": 0.4014, "step": 2337 }, { "epoch": 0.2275425790754258, "grad_norm": 1.1124893316550342, "learning_rate": 9.011661511801088e-06, "loss": 0.2957, "step": 2338 }, { "epoch": 0.22763990267639903, "grad_norm": 1.2537185195667433, "learning_rate": 9.010720540720888e-06, "loss": 0.3004, "step": 2339 }, { "epoch": 0.22773722627737225, "grad_norm": 1.4597689601256807, "learning_rate": 9.009779171092923e-06, "loss": 0.2555, "step": 2340 }, { "epoch": 0.2278345498783455, "grad_norm": 1.4737791439989423, "learning_rate": 9.008837403010736e-06, "loss": 0.5355, "step": 2341 }, { "epoch": 0.22793187347931873, "grad_norm": 1.3795639069131398, "learning_rate": 9.007895236567913e-06, "loss": 0.3961, "step": 2342 }, { "epoch": 0.22802919708029198, "grad_norm": 1.6364796903185053, "learning_rate": 9.006952671858078e-06, "loss": 0.444, "step": 2343 }, { "epoch": 0.2281265206812652, "grad_norm": 1.1964346909925698, "learning_rate": 9.006009708974892e-06, "loss": 0.3297, "step": 2344 }, { "epoch": 0.22822384428223844, "grad_norm": 1.343808771666808, "learning_rate": 9.00506634801206e-06, "loss": 0.4537, "step": 2345 }, { "epoch": 0.2283211678832117, "grad_norm": 1.4003110727355261, "learning_rate": 9.004122589063323e-06, "loss": 0.3883, "step": 2346 }, { "epoch": 0.22841849148418492, "grad_norm": 1.2435101838594087, "learning_rate": 9.003178432222462e-06, "loss": 0.4238, "step": 2347 }, { "epoch": 0.22851581508515814, "grad_norm": 1.324643227390155, "learning_rate": 9.0022338775833e-06, "loss": 0.4139, "step": 2348 }, { "epoch": 0.2286131386861314, "grad_norm": 1.7692069120616638, "learning_rate": 9.001288925239698e-06, "loss": 0.4719, "step": 2349 }, { "epoch": 0.22871046228710462, "grad_norm": 1.223562422765287, "learning_rate": 9.000343575285555e-06, "loss": 0.3256, "step": 2350 }, { "epoch": 0.22880778588807785, "grad_norm": 1.3407025045830592, "learning_rate": 8.999397827814812e-06, "loss": 0.3788, "step": 2351 }, { "epoch": 0.2289051094890511, "grad_norm": 1.5281139100341292, "learning_rate": 8.99845168292145e-06, "loss": 0.5565, "step": 2352 }, { "epoch": 0.22900243309002433, "grad_norm": 1.560155712083658, "learning_rate": 8.997505140699488e-06, "loss": 0.4957, "step": 2353 }, { "epoch": 0.22909975669099758, "grad_norm": 1.290422773797366, "learning_rate": 8.996558201242981e-06, "loss": 0.4011, "step": 2354 }, { "epoch": 0.2291970802919708, "grad_norm": 1.2847680894150124, "learning_rate": 8.99561086464603e-06, "loss": 0.4419, "step": 2355 }, { "epoch": 0.22929440389294403, "grad_norm": 1.4625413220428547, "learning_rate": 8.99466313100277e-06, "loss": 0.2511, "step": 2356 }, { "epoch": 0.22939172749391729, "grad_norm": 1.2882840667194135, "learning_rate": 8.99371500040738e-06, "loss": 0.3992, "step": 2357 }, { "epoch": 0.2294890510948905, "grad_norm": 1.1997126453782205, "learning_rate": 8.992766472954077e-06, "loss": 0.2639, "step": 2358 }, { "epoch": 0.22958637469586374, "grad_norm": 1.6688893120724655, "learning_rate": 8.991817548737114e-06, "loss": 0.3103, "step": 2359 }, { "epoch": 0.229683698296837, "grad_norm": 1.4031771252981649, "learning_rate": 8.990868227850788e-06, "loss": 0.4245, "step": 2360 }, { "epoch": 0.22978102189781022, "grad_norm": 1.4825462721346627, "learning_rate": 8.989918510389432e-06, "loss": 0.3973, "step": 2361 }, { "epoch": 0.22987834549878344, "grad_norm": 1.7756990641125774, "learning_rate": 8.988968396447424e-06, "loss": 0.6091, "step": 2362 }, { "epoch": 0.2299756690997567, "grad_norm": 1.5519381803018173, "learning_rate": 8.988017886119172e-06, "loss": 0.5849, "step": 2363 }, { "epoch": 0.23007299270072992, "grad_norm": 1.5288537407748173, "learning_rate": 8.987066979499133e-06, "loss": 0.594, "step": 2364 }, { "epoch": 0.23017031630170318, "grad_norm": 1.2519254160654887, "learning_rate": 8.986115676681797e-06, "loss": 0.3781, "step": 2365 }, { "epoch": 0.2302676399026764, "grad_norm": 1.2118409754918265, "learning_rate": 8.985163977761697e-06, "loss": 0.3761, "step": 2366 }, { "epoch": 0.23036496350364963, "grad_norm": 1.3123505825187787, "learning_rate": 8.984211882833402e-06, "loss": 0.405, "step": 2367 }, { "epoch": 0.23046228710462288, "grad_norm": 1.6027642184293107, "learning_rate": 8.983259391991524e-06, "loss": 0.597, "step": 2368 }, { "epoch": 0.2305596107055961, "grad_norm": 1.3646497443348367, "learning_rate": 8.982306505330712e-06, "loss": 0.4036, "step": 2369 }, { "epoch": 0.23065693430656933, "grad_norm": 1.2894115553392402, "learning_rate": 8.981353222945653e-06, "loss": 0.2778, "step": 2370 }, { "epoch": 0.2307542579075426, "grad_norm": 1.27883786418869, "learning_rate": 8.98039954493108e-06, "loss": 0.3803, "step": 2371 }, { "epoch": 0.2308515815085158, "grad_norm": 1.5863647637061415, "learning_rate": 8.979445471381755e-06, "loss": 0.4716, "step": 2372 }, { "epoch": 0.23094890510948904, "grad_norm": 1.1874137646332688, "learning_rate": 8.97849100239249e-06, "loss": 0.2846, "step": 2373 }, { "epoch": 0.2310462287104623, "grad_norm": 1.6936318641369774, "learning_rate": 8.977536138058126e-06, "loss": 0.3418, "step": 2374 }, { "epoch": 0.23114355231143552, "grad_norm": 1.0526167143851337, "learning_rate": 8.976580878473553e-06, "loss": 0.258, "step": 2375 }, { "epoch": 0.23124087591240877, "grad_norm": 1.753799809070063, "learning_rate": 8.975625223733693e-06, "loss": 0.4764, "step": 2376 }, { "epoch": 0.231338199513382, "grad_norm": 1.3814482775299988, "learning_rate": 8.97466917393351e-06, "loss": 0.3811, "step": 2377 }, { "epoch": 0.23143552311435522, "grad_norm": 1.575424754678499, "learning_rate": 8.97371272916801e-06, "loss": 0.5028, "step": 2378 }, { "epoch": 0.23153284671532848, "grad_norm": 1.5163540217481704, "learning_rate": 8.972755889532234e-06, "loss": 0.4055, "step": 2379 }, { "epoch": 0.2316301703163017, "grad_norm": 1.1877796947964157, "learning_rate": 8.971798655121264e-06, "loss": 0.2978, "step": 2380 }, { "epoch": 0.23172749391727493, "grad_norm": 1.6274909221671408, "learning_rate": 8.970841026030218e-06, "loss": 0.4319, "step": 2381 }, { "epoch": 0.23182481751824818, "grad_norm": 1.413480143472021, "learning_rate": 8.969883002354259e-06, "loss": 0.4015, "step": 2382 }, { "epoch": 0.2319221411192214, "grad_norm": 1.451327617189514, "learning_rate": 8.968924584188587e-06, "loss": 0.5107, "step": 2383 }, { "epoch": 0.23201946472019463, "grad_norm": 1.4288160659587352, "learning_rate": 8.96796577162844e-06, "loss": 0.369, "step": 2384 }, { "epoch": 0.2321167883211679, "grad_norm": 1.6469132304956866, "learning_rate": 8.967006564769094e-06, "loss": 0.5982, "step": 2385 }, { "epoch": 0.23221411192214111, "grad_norm": 1.4887239693800984, "learning_rate": 8.966046963705869e-06, "loss": 0.4967, "step": 2386 }, { "epoch": 0.23231143552311437, "grad_norm": 1.2469481884120308, "learning_rate": 8.965086968534116e-06, "loss": 0.4022, "step": 2387 }, { "epoch": 0.2324087591240876, "grad_norm": 2.6320603198934047, "learning_rate": 8.964126579349237e-06, "loss": 0.2489, "step": 2388 }, { "epoch": 0.23250608272506082, "grad_norm": 1.2339093742509784, "learning_rate": 8.963165796246663e-06, "loss": 0.3694, "step": 2389 }, { "epoch": 0.23260340632603407, "grad_norm": 1.4634162966788549, "learning_rate": 8.962204619321866e-06, "loss": 0.5646, "step": 2390 }, { "epoch": 0.2327007299270073, "grad_norm": 1.2919651066139786, "learning_rate": 8.961243048670363e-06, "loss": 0.3833, "step": 2391 }, { "epoch": 0.23279805352798053, "grad_norm": 1.5273773111622013, "learning_rate": 8.960281084387701e-06, "loss": 0.5724, "step": 2392 }, { "epoch": 0.23289537712895378, "grad_norm": 1.4704498843019616, "learning_rate": 8.959318726569475e-06, "loss": 0.5232, "step": 2393 }, { "epoch": 0.232992700729927, "grad_norm": 1.52947786509823, "learning_rate": 8.958355975311314e-06, "loss": 0.5014, "step": 2394 }, { "epoch": 0.23309002433090023, "grad_norm": 1.457234959002331, "learning_rate": 8.957392830708886e-06, "loss": 0.5401, "step": 2395 }, { "epoch": 0.23318734793187348, "grad_norm": 1.5878948291380384, "learning_rate": 8.9564292928579e-06, "loss": 0.4481, "step": 2396 }, { "epoch": 0.2332846715328467, "grad_norm": 1.3353181262068508, "learning_rate": 8.955465361854103e-06, "loss": 0.3668, "step": 2397 }, { "epoch": 0.23338199513381996, "grad_norm": 2.023729457927684, "learning_rate": 8.954501037793282e-06, "loss": 0.256, "step": 2398 }, { "epoch": 0.2334793187347932, "grad_norm": 1.3501136378744423, "learning_rate": 8.953536320771264e-06, "loss": 0.4288, "step": 2399 }, { "epoch": 0.23357664233576642, "grad_norm": 0.9695156209886321, "learning_rate": 8.95257121088391e-06, "loss": 0.3313, "step": 2400 }, { "epoch": 0.23367396593673967, "grad_norm": 1.6268089203048999, "learning_rate": 8.951605708227125e-06, "loss": 0.5031, "step": 2401 }, { "epoch": 0.2337712895377129, "grad_norm": 1.3327356528771297, "learning_rate": 8.950639812896852e-06, "loss": 0.352, "step": 2402 }, { "epoch": 0.23386861313868612, "grad_norm": 1.646158604731562, "learning_rate": 8.949673524989074e-06, "loss": 0.6143, "step": 2403 }, { "epoch": 0.23396593673965937, "grad_norm": 1.4459398712277267, "learning_rate": 8.948706844599809e-06, "loss": 0.301, "step": 2404 }, { "epoch": 0.2340632603406326, "grad_norm": 1.242464142709881, "learning_rate": 8.947739771825118e-06, "loss": 0.3867, "step": 2405 }, { "epoch": 0.23416058394160583, "grad_norm": 1.283369590610404, "learning_rate": 8.946772306761099e-06, "loss": 0.3396, "step": 2406 }, { "epoch": 0.23425790754257908, "grad_norm": 1.659051576981879, "learning_rate": 8.94580444950389e-06, "loss": 0.2985, "step": 2407 }, { "epoch": 0.2343552311435523, "grad_norm": 1.5811557183787177, "learning_rate": 8.944836200149669e-06, "loss": 0.5412, "step": 2408 }, { "epoch": 0.23445255474452556, "grad_norm": 1.5937284580345608, "learning_rate": 8.943867558794648e-06, "loss": 0.4562, "step": 2409 }, { "epoch": 0.23454987834549879, "grad_norm": 1.179539450140548, "learning_rate": 8.942898525535085e-06, "loss": 0.2436, "step": 2410 }, { "epoch": 0.234647201946472, "grad_norm": 1.2115140465312926, "learning_rate": 8.941929100467272e-06, "loss": 0.325, "step": 2411 }, { "epoch": 0.23474452554744527, "grad_norm": 1.3228525862104779, "learning_rate": 8.94095928368754e-06, "loss": 0.4001, "step": 2412 }, { "epoch": 0.2348418491484185, "grad_norm": 1.5093562470528878, "learning_rate": 8.939989075292263e-06, "loss": 0.3554, "step": 2413 }, { "epoch": 0.23493917274939172, "grad_norm": 1.629660086616085, "learning_rate": 8.93901847537785e-06, "loss": 0.6349, "step": 2414 }, { "epoch": 0.23503649635036497, "grad_norm": 1.0826348229158524, "learning_rate": 8.938047484040749e-06, "loss": 0.2681, "step": 2415 }, { "epoch": 0.2351338199513382, "grad_norm": 1.2841520241198179, "learning_rate": 8.93707610137745e-06, "loss": 0.4081, "step": 2416 }, { "epoch": 0.23523114355231142, "grad_norm": 1.913465881785096, "learning_rate": 8.936104327484479e-06, "loss": 0.7043, "step": 2417 }, { "epoch": 0.23532846715328468, "grad_norm": 1.386306701477425, "learning_rate": 8.935132162458401e-06, "loss": 0.341, "step": 2418 }, { "epoch": 0.2354257907542579, "grad_norm": 1.1278547518059516, "learning_rate": 8.934159606395821e-06, "loss": 0.3151, "step": 2419 }, { "epoch": 0.23552311435523116, "grad_norm": 1.5540265542588236, "learning_rate": 8.933186659393384e-06, "loss": 0.6514, "step": 2420 }, { "epoch": 0.23562043795620438, "grad_norm": 1.278787339635804, "learning_rate": 8.932213321547769e-06, "loss": 0.3423, "step": 2421 }, { "epoch": 0.2357177615571776, "grad_norm": 1.2885094583361822, "learning_rate": 8.931239592955701e-06, "loss": 0.2958, "step": 2422 }, { "epoch": 0.23581508515815086, "grad_norm": 1.5181901598500283, "learning_rate": 8.930265473713939e-06, "loss": 0.4212, "step": 2423 }, { "epoch": 0.2359124087591241, "grad_norm": 1.2136160482551297, "learning_rate": 8.92929096391928e-06, "loss": 0.3982, "step": 2424 }, { "epoch": 0.2360097323600973, "grad_norm": 1.5487072814518004, "learning_rate": 8.928316063668562e-06, "loss": 0.5676, "step": 2425 }, { "epoch": 0.23610705596107057, "grad_norm": 1.430432818475582, "learning_rate": 8.927340773058664e-06, "loss": 0.4735, "step": 2426 }, { "epoch": 0.2362043795620438, "grad_norm": 1.4586841524588252, "learning_rate": 8.926365092186498e-06, "loss": 0.5637, "step": 2427 }, { "epoch": 0.23630170316301702, "grad_norm": 1.5364014523424565, "learning_rate": 8.92538902114902e-06, "loss": 0.4783, "step": 2428 }, { "epoch": 0.23639902676399027, "grad_norm": 1.3896600182614345, "learning_rate": 8.924412560043223e-06, "loss": 0.3748, "step": 2429 }, { "epoch": 0.2364963503649635, "grad_norm": 1.304447540327908, "learning_rate": 8.923435708966135e-06, "loss": 0.3373, "step": 2430 }, { "epoch": 0.23659367396593675, "grad_norm": 1.3383082719469825, "learning_rate": 8.922458468014833e-06, "loss": 0.3089, "step": 2431 }, { "epoch": 0.23669099756690998, "grad_norm": 1.4376693294142868, "learning_rate": 8.921480837286418e-06, "loss": 0.2665, "step": 2432 }, { "epoch": 0.2367883211678832, "grad_norm": 1.3948368197200884, "learning_rate": 8.920502816878045e-06, "loss": 0.4349, "step": 2433 }, { "epoch": 0.23688564476885646, "grad_norm": 1.5583938814663865, "learning_rate": 8.919524406886897e-06, "loss": 0.4528, "step": 2434 }, { "epoch": 0.23698296836982968, "grad_norm": 1.455016515054737, "learning_rate": 8.918545607410199e-06, "loss": 0.416, "step": 2435 }, { "epoch": 0.2370802919708029, "grad_norm": 1.5707414335423742, "learning_rate": 8.917566418545215e-06, "loss": 0.4269, "step": 2436 }, { "epoch": 0.23717761557177616, "grad_norm": 1.6214497738286784, "learning_rate": 8.916586840389248e-06, "loss": 0.5531, "step": 2437 }, { "epoch": 0.2372749391727494, "grad_norm": 1.5231468510302828, "learning_rate": 8.91560687303964e-06, "loss": 0.5464, "step": 2438 }, { "epoch": 0.23737226277372261, "grad_norm": 1.5631657517225734, "learning_rate": 8.91462651659377e-06, "loss": 0.4098, "step": 2439 }, { "epoch": 0.23746958637469587, "grad_norm": 1.5003582208774642, "learning_rate": 8.913645771149058e-06, "loss": 0.342, "step": 2440 }, { "epoch": 0.2375669099756691, "grad_norm": 1.2703591332316027, "learning_rate": 8.91266463680296e-06, "loss": 0.3195, "step": 2441 }, { "epoch": 0.23766423357664235, "grad_norm": 1.3910967851640175, "learning_rate": 8.91168311365297e-06, "loss": 0.334, "step": 2442 }, { "epoch": 0.23776155717761557, "grad_norm": 1.5001053773105038, "learning_rate": 8.910701201796625e-06, "loss": 0.4665, "step": 2443 }, { "epoch": 0.2378588807785888, "grad_norm": 1.6142849143926903, "learning_rate": 8.9097189013315e-06, "loss": 0.5276, "step": 2444 }, { "epoch": 0.23795620437956205, "grad_norm": 1.2059866820401877, "learning_rate": 8.908736212355202e-06, "loss": 0.2936, "step": 2445 }, { "epoch": 0.23805352798053528, "grad_norm": 1.4496663268694052, "learning_rate": 8.907753134965387e-06, "loss": 0.475, "step": 2446 }, { "epoch": 0.2381508515815085, "grad_norm": 1.4184456855989886, "learning_rate": 8.90676966925974e-06, "loss": 0.4477, "step": 2447 }, { "epoch": 0.23824817518248176, "grad_norm": 1.7126804340284862, "learning_rate": 8.90578581533599e-06, "loss": 0.6392, "step": 2448 }, { "epoch": 0.23834549878345498, "grad_norm": 1.6085356958926766, "learning_rate": 8.904801573291901e-06, "loss": 0.4428, "step": 2449 }, { "epoch": 0.2384428223844282, "grad_norm": 1.1724096477321129, "learning_rate": 8.903816943225281e-06, "loss": 0.23, "step": 2450 }, { "epoch": 0.23854014598540146, "grad_norm": 1.2849397331978023, "learning_rate": 8.902831925233972e-06, "loss": 0.4315, "step": 2451 }, { "epoch": 0.2386374695863747, "grad_norm": 1.3479724015628292, "learning_rate": 8.901846519415856e-06, "loss": 0.4528, "step": 2452 }, { "epoch": 0.23873479318734794, "grad_norm": 1.5241447958707557, "learning_rate": 8.900860725868852e-06, "loss": 0.5638, "step": 2453 }, { "epoch": 0.23883211678832117, "grad_norm": 1.3008951589753057, "learning_rate": 8.899874544690921e-06, "loss": 0.4364, "step": 2454 }, { "epoch": 0.2389294403892944, "grad_norm": 1.3889516127516133, "learning_rate": 8.89888797598006e-06, "loss": 0.5968, "step": 2455 }, { "epoch": 0.23902676399026765, "grad_norm": 1.3382384356293548, "learning_rate": 8.8979010198343e-06, "loss": 0.3423, "step": 2456 }, { "epoch": 0.23912408759124087, "grad_norm": 1.3927455122024084, "learning_rate": 8.896913676351726e-06, "loss": 0.5291, "step": 2457 }, { "epoch": 0.2392214111922141, "grad_norm": 1.3654704619725508, "learning_rate": 8.895925945630441e-06, "loss": 0.3224, "step": 2458 }, { "epoch": 0.23931873479318735, "grad_norm": 2.420859240745107, "learning_rate": 8.8949378277686e-06, "loss": 0.4526, "step": 2459 }, { "epoch": 0.23941605839416058, "grad_norm": 1.279171164356654, "learning_rate": 8.893949322864394e-06, "loss": 0.3452, "step": 2460 }, { "epoch": 0.2395133819951338, "grad_norm": 1.4336845514712926, "learning_rate": 8.89296043101605e-06, "loss": 0.3891, "step": 2461 }, { "epoch": 0.23961070559610706, "grad_norm": 1.2391493008048138, "learning_rate": 8.891971152321836e-06, "loss": 0.5135, "step": 2462 }, { "epoch": 0.23970802919708029, "grad_norm": 1.2398633987802397, "learning_rate": 8.890981486880057e-06, "loss": 0.2688, "step": 2463 }, { "epoch": 0.23980535279805354, "grad_norm": 1.1975725536626207, "learning_rate": 8.889991434789054e-06, "loss": 0.4181, "step": 2464 }, { "epoch": 0.23990267639902677, "grad_norm": 1.5121790458693565, "learning_rate": 8.889000996147213e-06, "loss": 0.667, "step": 2465 }, { "epoch": 0.24, "grad_norm": 1.2980809407294283, "learning_rate": 8.888010171052951e-06, "loss": 0.4025, "step": 2466 }, { "epoch": 0.24009732360097324, "grad_norm": 1.3683247659037883, "learning_rate": 8.887018959604731e-06, "loss": 0.4195, "step": 2467 }, { "epoch": 0.24019464720194647, "grad_norm": 1.6392091000056277, "learning_rate": 8.886027361901045e-06, "loss": 0.4464, "step": 2468 }, { "epoch": 0.2402919708029197, "grad_norm": 1.4286158146093557, "learning_rate": 8.885035378040435e-06, "loss": 0.503, "step": 2469 }, { "epoch": 0.24038929440389295, "grad_norm": 1.6249203295617591, "learning_rate": 8.884043008121468e-06, "loss": 0.5875, "step": 2470 }, { "epoch": 0.24048661800486618, "grad_norm": 1.316531393288964, "learning_rate": 8.883050252242762e-06, "loss": 0.3225, "step": 2471 }, { "epoch": 0.24058394160583943, "grad_norm": 1.3738066957140371, "learning_rate": 8.882057110502964e-06, "loss": 0.3863, "step": 2472 }, { "epoch": 0.24068126520681266, "grad_norm": 1.6149562610100578, "learning_rate": 8.881063583000766e-06, "loss": 0.6899, "step": 2473 }, { "epoch": 0.24077858880778588, "grad_norm": 1.1978996054498634, "learning_rate": 8.880069669834895e-06, "loss": 0.4647, "step": 2474 }, { "epoch": 0.24087591240875914, "grad_norm": 1.3737195294986575, "learning_rate": 8.879075371104114e-06, "loss": 0.3404, "step": 2475 }, { "epoch": 0.24097323600973236, "grad_norm": 1.3242090275500389, "learning_rate": 8.878080686907231e-06, "loss": 0.4923, "step": 2476 }, { "epoch": 0.2410705596107056, "grad_norm": 1.295191211796917, "learning_rate": 8.877085617343085e-06, "loss": 0.4449, "step": 2477 }, { "epoch": 0.24116788321167884, "grad_norm": 1.5068542914468723, "learning_rate": 8.87609016251056e-06, "loss": 0.5506, "step": 2478 }, { "epoch": 0.24126520681265207, "grad_norm": 1.650040845654398, "learning_rate": 8.87509432250857e-06, "loss": 0.5715, "step": 2479 }, { "epoch": 0.2413625304136253, "grad_norm": 1.5289429392674028, "learning_rate": 8.874098097436078e-06, "loss": 0.5626, "step": 2480 }, { "epoch": 0.24145985401459855, "grad_norm": 1.3609358059405043, "learning_rate": 8.873101487392078e-06, "loss": 0.4096, "step": 2481 }, { "epoch": 0.24155717761557177, "grad_norm": 1.5725676631470524, "learning_rate": 8.8721044924756e-06, "loss": 0.6597, "step": 2482 }, { "epoch": 0.24165450121654503, "grad_norm": 1.094002939677081, "learning_rate": 8.87110711278572e-06, "loss": 0.3206, "step": 2483 }, { "epoch": 0.24175182481751825, "grad_norm": 1.4551979783640236, "learning_rate": 8.870109348421544e-06, "loss": 0.445, "step": 2484 }, { "epoch": 0.24184914841849148, "grad_norm": 1.534219781362636, "learning_rate": 8.869111199482227e-06, "loss": 0.6666, "step": 2485 }, { "epoch": 0.24194647201946473, "grad_norm": 0.9530847884904149, "learning_rate": 8.86811266606695e-06, "loss": 0.2756, "step": 2486 }, { "epoch": 0.24204379562043796, "grad_norm": 1.4859819247146357, "learning_rate": 8.86711374827494e-06, "loss": 0.4626, "step": 2487 }, { "epoch": 0.24214111922141118, "grad_norm": 1.5336983239407425, "learning_rate": 8.86611444620546e-06, "loss": 0.5383, "step": 2488 }, { "epoch": 0.24223844282238444, "grad_norm": 1.4073640437212571, "learning_rate": 8.865114759957812e-06, "loss": 0.4675, "step": 2489 }, { "epoch": 0.24233576642335766, "grad_norm": 1.562895534043348, "learning_rate": 8.864114689631334e-06, "loss": 0.5641, "step": 2490 }, { "epoch": 0.2424330900243309, "grad_norm": 1.643145414496213, "learning_rate": 8.863114235325405e-06, "loss": 0.5749, "step": 2491 }, { "epoch": 0.24253041362530414, "grad_norm": 1.226721686463078, "learning_rate": 8.862113397139437e-06, "loss": 0.3432, "step": 2492 }, { "epoch": 0.24262773722627737, "grad_norm": 1.2699959241996903, "learning_rate": 8.86111217517289e-06, "loss": 0.4203, "step": 2493 }, { "epoch": 0.24272506082725062, "grad_norm": 1.4233705808484327, "learning_rate": 8.860110569525253e-06, "loss": 0.2601, "step": 2494 }, { "epoch": 0.24282238442822385, "grad_norm": 1.3784035260656315, "learning_rate": 8.859108580296055e-06, "loss": 0.4973, "step": 2495 }, { "epoch": 0.24291970802919707, "grad_norm": 1.2790024746236357, "learning_rate": 8.858106207584864e-06, "loss": 0.4067, "step": 2496 }, { "epoch": 0.24301703163017033, "grad_norm": 1.4041054798155945, "learning_rate": 8.857103451491292e-06, "loss": 0.5228, "step": 2497 }, { "epoch": 0.24311435523114355, "grad_norm": 1.6788565066048042, "learning_rate": 8.856100312114975e-06, "loss": 0.7133, "step": 2498 }, { "epoch": 0.24321167883211678, "grad_norm": 1.2024623978380433, "learning_rate": 8.855096789555602e-06, "loss": 0.2507, "step": 2499 }, { "epoch": 0.24330900243309003, "grad_norm": 1.3828470689148782, "learning_rate": 8.85409288391289e-06, "loss": 0.3993, "step": 2500 }, { "epoch": 0.24340632603406326, "grad_norm": 1.427484284296059, "learning_rate": 8.8530885952866e-06, "loss": 0.3926, "step": 2501 }, { "epoch": 0.24350364963503648, "grad_norm": 1.3193446567792235, "learning_rate": 8.852083923776529e-06, "loss": 0.2152, "step": 2502 }, { "epoch": 0.24360097323600974, "grad_norm": 1.3297823718570532, "learning_rate": 8.851078869482509e-06, "loss": 0.4772, "step": 2503 }, { "epoch": 0.24369829683698296, "grad_norm": 1.3044660003313646, "learning_rate": 8.850073432504416e-06, "loss": 0.3589, "step": 2504 }, { "epoch": 0.24379562043795622, "grad_norm": 1.4488096250914715, "learning_rate": 8.84906761294216e-06, "loss": 0.3261, "step": 2505 }, { "epoch": 0.24389294403892944, "grad_norm": 1.2778329641523152, "learning_rate": 8.848061410895687e-06, "loss": 0.3047, "step": 2506 }, { "epoch": 0.24399026763990267, "grad_norm": 1.135638375757245, "learning_rate": 8.847054826464988e-06, "loss": 0.3173, "step": 2507 }, { "epoch": 0.24408759124087592, "grad_norm": 1.5033745953013864, "learning_rate": 8.846047859750086e-06, "loss": 0.4813, "step": 2508 }, { "epoch": 0.24418491484184915, "grad_norm": 1.1189501535394493, "learning_rate": 8.845040510851044e-06, "loss": 0.3359, "step": 2509 }, { "epoch": 0.24428223844282237, "grad_norm": 1.4743455663494507, "learning_rate": 8.844032779867966e-06, "loss": 0.5354, "step": 2510 }, { "epoch": 0.24437956204379563, "grad_norm": 1.2644405709657818, "learning_rate": 8.843024666900983e-06, "loss": 0.4019, "step": 2511 }, { "epoch": 0.24447688564476885, "grad_norm": 1.5585250648144962, "learning_rate": 8.84201617205028e-06, "loss": 0.4977, "step": 2512 }, { "epoch": 0.24457420924574208, "grad_norm": 1.5187811483320863, "learning_rate": 8.841007295416069e-06, "loss": 0.6282, "step": 2513 }, { "epoch": 0.24467153284671533, "grad_norm": 1.461783750506842, "learning_rate": 8.839998037098601e-06, "loss": 0.6085, "step": 2514 }, { "epoch": 0.24476885644768856, "grad_norm": 1.4235036556142022, "learning_rate": 8.838988397198167e-06, "loss": 0.5696, "step": 2515 }, { "epoch": 0.2448661800486618, "grad_norm": 1.6731038078758624, "learning_rate": 8.837978375815097e-06, "loss": 0.5026, "step": 2516 }, { "epoch": 0.24496350364963504, "grad_norm": 1.2803102163564937, "learning_rate": 8.836967973049757e-06, "loss": 0.2605, "step": 2517 }, { "epoch": 0.24506082725060827, "grad_norm": 1.2869808613318177, "learning_rate": 8.835957189002551e-06, "loss": 0.3073, "step": 2518 }, { "epoch": 0.24515815085158152, "grad_norm": 1.4129342483481067, "learning_rate": 8.834946023773921e-06, "loss": 0.334, "step": 2519 }, { "epoch": 0.24525547445255474, "grad_norm": 1.6342111830003216, "learning_rate": 8.833934477464348e-06, "loss": 0.6127, "step": 2520 }, { "epoch": 0.24535279805352797, "grad_norm": 1.6465764681762454, "learning_rate": 8.83292255017435e-06, "loss": 0.6432, "step": 2521 }, { "epoch": 0.24545012165450122, "grad_norm": 1.4262158711234114, "learning_rate": 8.83191024200448e-06, "loss": 0.5224, "step": 2522 }, { "epoch": 0.24554744525547445, "grad_norm": 1.593193256147642, "learning_rate": 8.830897553055337e-06, "loss": 0.5211, "step": 2523 }, { "epoch": 0.24564476885644768, "grad_norm": 1.624031218270973, "learning_rate": 8.829884483427547e-06, "loss": 0.5128, "step": 2524 }, { "epoch": 0.24574209245742093, "grad_norm": 1.991662408778961, "learning_rate": 8.828871033221783e-06, "loss": 0.3025, "step": 2525 }, { "epoch": 0.24583941605839416, "grad_norm": 1.4390691402915812, "learning_rate": 8.82785720253875e-06, "loss": 0.5088, "step": 2526 }, { "epoch": 0.2459367396593674, "grad_norm": 1.4179406701872763, "learning_rate": 8.826842991479197e-06, "loss": 0.3887, "step": 2527 }, { "epoch": 0.24603406326034064, "grad_norm": 1.460230365502962, "learning_rate": 8.825828400143902e-06, "loss": 0.3316, "step": 2528 }, { "epoch": 0.24613138686131386, "grad_norm": 1.4924241123043909, "learning_rate": 8.824813428633685e-06, "loss": 0.4989, "step": 2529 }, { "epoch": 0.24622871046228711, "grad_norm": 1.593556186634644, "learning_rate": 8.82379807704941e-06, "loss": 0.525, "step": 2530 }, { "epoch": 0.24632603406326034, "grad_norm": 1.6809566227650843, "learning_rate": 8.822782345491968e-06, "loss": 0.3421, "step": 2531 }, { "epoch": 0.24642335766423357, "grad_norm": 1.4773288736144092, "learning_rate": 8.821766234062294e-06, "loss": 0.534, "step": 2532 }, { "epoch": 0.24652068126520682, "grad_norm": 1.4001059355846526, "learning_rate": 8.820749742861363e-06, "loss": 0.3887, "step": 2533 }, { "epoch": 0.24661800486618005, "grad_norm": 1.349012582441713, "learning_rate": 8.81973287199018e-06, "loss": 0.2852, "step": 2534 }, { "epoch": 0.24671532846715327, "grad_norm": 1.651550318908522, "learning_rate": 8.818715621549794e-06, "loss": 0.4967, "step": 2535 }, { "epoch": 0.24681265206812653, "grad_norm": 1.5932669562049986, "learning_rate": 8.817697991641289e-06, "loss": 0.4173, "step": 2536 }, { "epoch": 0.24690997566909975, "grad_norm": 1.3550488264007063, "learning_rate": 8.816679982365787e-06, "loss": 0.3404, "step": 2537 }, { "epoch": 0.247007299270073, "grad_norm": 1.571341106532058, "learning_rate": 8.815661593824451e-06, "loss": 0.5666, "step": 2538 }, { "epoch": 0.24710462287104623, "grad_norm": 1.5685299297246114, "learning_rate": 8.814642826118477e-06, "loss": 0.4521, "step": 2539 }, { "epoch": 0.24720194647201946, "grad_norm": 1.5355691524375334, "learning_rate": 8.8136236793491e-06, "loss": 0.3452, "step": 2540 }, { "epoch": 0.2472992700729927, "grad_norm": 1.4490992247448509, "learning_rate": 8.812604153617594e-06, "loss": 0.3046, "step": 2541 }, { "epoch": 0.24739659367396594, "grad_norm": 1.5790493967738255, "learning_rate": 8.81158424902527e-06, "loss": 0.5957, "step": 2542 }, { "epoch": 0.24749391727493916, "grad_norm": 1.8299083651337236, "learning_rate": 8.810563965673478e-06, "loss": 0.529, "step": 2543 }, { "epoch": 0.24759124087591242, "grad_norm": 1.336357630649535, "learning_rate": 8.8095433036636e-06, "loss": 0.2498, "step": 2544 }, { "epoch": 0.24768856447688564, "grad_norm": 3.272954864246679, "learning_rate": 8.808522263097065e-06, "loss": 0.3439, "step": 2545 }, { "epoch": 0.24778588807785887, "grad_norm": 1.5948700054852, "learning_rate": 8.80750084407533e-06, "loss": 0.5754, "step": 2546 }, { "epoch": 0.24788321167883212, "grad_norm": 1.2457293034288246, "learning_rate": 8.806479046699896e-06, "loss": 0.3355, "step": 2547 }, { "epoch": 0.24798053527980535, "grad_norm": 1.4118835775208534, "learning_rate": 8.8054568710723e-06, "loss": 0.4843, "step": 2548 }, { "epoch": 0.2480778588807786, "grad_norm": 2.0167817337794745, "learning_rate": 8.804434317294115e-06, "loss": 0.4781, "step": 2549 }, { "epoch": 0.24817518248175183, "grad_norm": 1.630746510877536, "learning_rate": 8.803411385466954e-06, "loss": 0.5226, "step": 2550 }, { "epoch": 0.24827250608272505, "grad_norm": 1.0942598516950242, "learning_rate": 8.802388075692465e-06, "loss": 0.1843, "step": 2551 }, { "epoch": 0.2483698296836983, "grad_norm": 1.8060042956650721, "learning_rate": 8.801364388072336e-06, "loss": 0.705, "step": 2552 }, { "epoch": 0.24846715328467153, "grad_norm": 1.632331667833736, "learning_rate": 8.800340322708291e-06, "loss": 0.4964, "step": 2553 }, { "epoch": 0.24856447688564476, "grad_norm": 1.539098206701319, "learning_rate": 8.799315879702095e-06, "loss": 0.3962, "step": 2554 }, { "epoch": 0.248661800486618, "grad_norm": 1.2219114137184675, "learning_rate": 8.798291059155543e-06, "loss": 0.2497, "step": 2555 }, { "epoch": 0.24875912408759124, "grad_norm": 1.4540964796439875, "learning_rate": 8.797265861170471e-06, "loss": 0.5159, "step": 2556 }, { "epoch": 0.24885644768856446, "grad_norm": 1.554150512584087, "learning_rate": 8.796240285848761e-06, "loss": 0.4412, "step": 2557 }, { "epoch": 0.24895377128953772, "grad_norm": 1.7004545782091594, "learning_rate": 8.795214333292318e-06, "loss": 0.5179, "step": 2558 }, { "epoch": 0.24905109489051094, "grad_norm": 1.726524110945535, "learning_rate": 8.794188003603095e-06, "loss": 0.4071, "step": 2559 }, { "epoch": 0.2491484184914842, "grad_norm": 1.27126477948415, "learning_rate": 8.793161296883077e-06, "loss": 0.2268, "step": 2560 }, { "epoch": 0.24924574209245742, "grad_norm": 1.9752049062158858, "learning_rate": 8.79213421323429e-06, "loss": 0.3632, "step": 2561 }, { "epoch": 0.24934306569343065, "grad_norm": 1.0556825817929254, "learning_rate": 8.791106752758796e-06, "loss": 0.3627, "step": 2562 }, { "epoch": 0.2494403892944039, "grad_norm": 1.6452772754401714, "learning_rate": 8.790078915558693e-06, "loss": 0.6043, "step": 2563 }, { "epoch": 0.24953771289537713, "grad_norm": 1.278547180886592, "learning_rate": 8.789050701736117e-06, "loss": 0.3768, "step": 2564 }, { "epoch": 0.24963503649635035, "grad_norm": 1.3443028399521961, "learning_rate": 8.788022111393247e-06, "loss": 0.3856, "step": 2565 }, { "epoch": 0.2497323600973236, "grad_norm": 1.2774166354695482, "learning_rate": 8.78699314463229e-06, "loss": 0.4391, "step": 2566 }, { "epoch": 0.24982968369829683, "grad_norm": 1.2231715277397497, "learning_rate": 8.785963801555497e-06, "loss": 0.4128, "step": 2567 }, { "epoch": 0.24992700729927006, "grad_norm": 1.4012153782510572, "learning_rate": 8.784934082265154e-06, "loss": 0.4683, "step": 2568 }, { "epoch": 0.2500243309002433, "grad_norm": 1.1954060436870173, "learning_rate": 8.783903986863583e-06, "loss": 0.2786, "step": 2569 }, { "epoch": 0.25012165450121654, "grad_norm": 1.7116998515615807, "learning_rate": 8.782873515453148e-06, "loss": 0.6004, "step": 2570 }, { "epoch": 0.2502189781021898, "grad_norm": 1.5712719922889962, "learning_rate": 8.781842668136247e-06, "loss": 0.6172, "step": 2571 }, { "epoch": 0.250316301703163, "grad_norm": 1.246915874910697, "learning_rate": 8.780811445015316e-06, "loss": 0.4335, "step": 2572 }, { "epoch": 0.25041362530413624, "grad_norm": 1.341456518636559, "learning_rate": 8.779779846192827e-06, "loss": 0.4187, "step": 2573 }, { "epoch": 0.2505109489051095, "grad_norm": 1.1323562755710477, "learning_rate": 8.778747871771293e-06, "loss": 0.2832, "step": 2574 }, { "epoch": 0.25060827250608275, "grad_norm": 1.4401083791532063, "learning_rate": 8.777715521853258e-06, "loss": 0.3779, "step": 2575 }, { "epoch": 0.25070559610705595, "grad_norm": 1.4784987737181619, "learning_rate": 8.77668279654131e-06, "loss": 0.3129, "step": 2576 }, { "epoch": 0.2508029197080292, "grad_norm": 1.1394717513462493, "learning_rate": 8.775649695938074e-06, "loss": 0.3162, "step": 2577 }, { "epoch": 0.25090024330900246, "grad_norm": 1.4625556674372375, "learning_rate": 8.774616220146204e-06, "loss": 0.4605, "step": 2578 }, { "epoch": 0.25099756690997566, "grad_norm": 3.1521808341091875, "learning_rate": 8.773582369268402e-06, "loss": 0.3485, "step": 2579 }, { "epoch": 0.2510948905109489, "grad_norm": 1.3578124438111323, "learning_rate": 8.7725481434074e-06, "loss": 0.4693, "step": 2580 }, { "epoch": 0.25119221411192216, "grad_norm": 1.63411664215404, "learning_rate": 8.771513542665969e-06, "loss": 0.4956, "step": 2581 }, { "epoch": 0.25128953771289536, "grad_norm": 1.5098765580454843, "learning_rate": 8.77047856714692e-06, "loss": 0.4657, "step": 2582 }, { "epoch": 0.2513868613138686, "grad_norm": 1.2801786921613054, "learning_rate": 8.7694432169531e-06, "loss": 0.3369, "step": 2583 }, { "epoch": 0.25148418491484187, "grad_norm": 1.4360422953324754, "learning_rate": 8.768407492187388e-06, "loss": 0.4907, "step": 2584 }, { "epoch": 0.25158150851581507, "grad_norm": 1.4560406874169747, "learning_rate": 8.767371392952708e-06, "loss": 0.3157, "step": 2585 }, { "epoch": 0.2516788321167883, "grad_norm": 1.934211832538441, "learning_rate": 8.766334919352018e-06, "loss": 0.7151, "step": 2586 }, { "epoch": 0.2517761557177616, "grad_norm": 1.6767044903158872, "learning_rate": 8.76529807148831e-06, "loss": 0.331, "step": 2587 }, { "epoch": 0.25187347931873477, "grad_norm": 1.4698852047894042, "learning_rate": 8.76426084946462e-06, "loss": 0.3951, "step": 2588 }, { "epoch": 0.251970802919708, "grad_norm": 1.3539539414605721, "learning_rate": 8.763223253384015e-06, "loss": 0.4011, "step": 2589 }, { "epoch": 0.2520681265206813, "grad_norm": 1.506242240790805, "learning_rate": 8.762185283349603e-06, "loss": 0.5274, "step": 2590 }, { "epoch": 0.2521654501216545, "grad_norm": 1.3140936667503142, "learning_rate": 8.761146939464527e-06, "loss": 0.3198, "step": 2591 }, { "epoch": 0.25226277372262773, "grad_norm": 1.1404767919952752, "learning_rate": 8.760108221831967e-06, "loss": 0.4013, "step": 2592 }, { "epoch": 0.252360097323601, "grad_norm": 1.4693477307137552, "learning_rate": 8.759069130555142e-06, "loss": 0.4783, "step": 2593 }, { "epoch": 0.2524574209245742, "grad_norm": 1.3352582665983712, "learning_rate": 8.75802966573731e-06, "loss": 0.4617, "step": 2594 }, { "epoch": 0.25255474452554744, "grad_norm": 1.2824428866870197, "learning_rate": 8.756989827481756e-06, "loss": 0.3352, "step": 2595 }, { "epoch": 0.2526520681265207, "grad_norm": 1.4774059328965283, "learning_rate": 8.755949615891814e-06, "loss": 0.4635, "step": 2596 }, { "epoch": 0.25274939172749394, "grad_norm": 1.6875827910282526, "learning_rate": 8.754909031070852e-06, "loss": 0.6222, "step": 2597 }, { "epoch": 0.25284671532846714, "grad_norm": 1.2063205441417741, "learning_rate": 8.75386807312227e-06, "loss": 0.2455, "step": 2598 }, { "epoch": 0.2529440389294404, "grad_norm": 1.3021547323360578, "learning_rate": 8.752826742149512e-06, "loss": 0.4329, "step": 2599 }, { "epoch": 0.25304136253041365, "grad_norm": 1.1835878076183852, "learning_rate": 8.751785038256054e-06, "loss": 0.3662, "step": 2600 }, { "epoch": 0.25313868613138685, "grad_norm": 1.544717999496196, "learning_rate": 8.750742961545409e-06, "loss": 0.3971, "step": 2601 }, { "epoch": 0.2532360097323601, "grad_norm": 1.3629505007649398, "learning_rate": 8.749700512121131e-06, "loss": 0.5107, "step": 2602 }, { "epoch": 0.25333333333333335, "grad_norm": 1.5737001686599814, "learning_rate": 8.74865769008681e-06, "loss": 0.5279, "step": 2603 }, { "epoch": 0.25343065693430655, "grad_norm": 1.4784815997261378, "learning_rate": 8.747614495546069e-06, "loss": 0.4792, "step": 2604 }, { "epoch": 0.2535279805352798, "grad_norm": 1.3236076722973804, "learning_rate": 8.74657092860257e-06, "loss": 0.3975, "step": 2605 }, { "epoch": 0.25362530413625306, "grad_norm": 1.0968595172191475, "learning_rate": 8.745526989360018e-06, "loss": 0.269, "step": 2606 }, { "epoch": 0.25372262773722626, "grad_norm": 1.562505340567045, "learning_rate": 8.744482677922147e-06, "loss": 0.5157, "step": 2607 }, { "epoch": 0.2538199513381995, "grad_norm": 1.656826278908397, "learning_rate": 8.743437994392729e-06, "loss": 0.4867, "step": 2608 }, { "epoch": 0.25391727493917277, "grad_norm": 1.3948672448161548, "learning_rate": 8.742392938875577e-06, "loss": 0.5279, "step": 2609 }, { "epoch": 0.25401459854014596, "grad_norm": 1.5892338813179163, "learning_rate": 8.741347511474539e-06, "loss": 0.5611, "step": 2610 }, { "epoch": 0.2541119221411192, "grad_norm": 1.6074249897923194, "learning_rate": 8.740301712293498e-06, "loss": 0.351, "step": 2611 }, { "epoch": 0.25420924574209247, "grad_norm": 1.6505540033315536, "learning_rate": 8.739255541436379e-06, "loss": 0.5747, "step": 2612 }, { "epoch": 0.25430656934306567, "grad_norm": 1.3314247428577628, "learning_rate": 8.738208999007137e-06, "loss": 0.3779, "step": 2613 }, { "epoch": 0.2544038929440389, "grad_norm": 1.2796270745139389, "learning_rate": 8.737162085109768e-06, "loss": 0.3557, "step": 2614 }, { "epoch": 0.2545012165450122, "grad_norm": 1.602637102567401, "learning_rate": 8.736114799848307e-06, "loss": 0.2882, "step": 2615 }, { "epoch": 0.2545985401459854, "grad_norm": 1.4207119219562419, "learning_rate": 8.735067143326821e-06, "loss": 0.3881, "step": 2616 }, { "epoch": 0.25469586374695863, "grad_norm": 1.4305110706379638, "learning_rate": 8.73401911564942e-06, "loss": 0.3486, "step": 2617 }, { "epoch": 0.2547931873479319, "grad_norm": 1.434428707272536, "learning_rate": 8.732970716920242e-06, "loss": 0.3169, "step": 2618 }, { "epoch": 0.25489051094890514, "grad_norm": 1.3228470441064362, "learning_rate": 8.73192194724347e-06, "loss": 0.4485, "step": 2619 }, { "epoch": 0.25498783454987833, "grad_norm": 1.3897030806906485, "learning_rate": 8.730872806723318e-06, "loss": 0.4172, "step": 2620 }, { "epoch": 0.2550851581508516, "grad_norm": 1.3840681937318722, "learning_rate": 8.729823295464045e-06, "loss": 0.251, "step": 2621 }, { "epoch": 0.25518248175182484, "grad_norm": 1.775278354364079, "learning_rate": 8.728773413569938e-06, "loss": 0.4811, "step": 2622 }, { "epoch": 0.25527980535279804, "grad_norm": 1.2701408917829737, "learning_rate": 8.727723161145325e-06, "loss": 0.2827, "step": 2623 }, { "epoch": 0.2553771289537713, "grad_norm": 1.5528362504659363, "learning_rate": 8.72667253829457e-06, "loss": 0.5084, "step": 2624 }, { "epoch": 0.25547445255474455, "grad_norm": 1.3793988523162408, "learning_rate": 8.725621545122074e-06, "loss": 0.3979, "step": 2625 }, { "epoch": 0.25557177615571774, "grad_norm": 1.70282889775673, "learning_rate": 8.724570181732275e-06, "loss": 0.5983, "step": 2626 }, { "epoch": 0.255669099756691, "grad_norm": 1.28105292316495, "learning_rate": 8.723518448229649e-06, "loss": 0.4756, "step": 2627 }, { "epoch": 0.25576642335766425, "grad_norm": 1.3826686116158597, "learning_rate": 8.722466344718705e-06, "loss": 0.2978, "step": 2628 }, { "epoch": 0.25586374695863745, "grad_norm": 1.460242284502631, "learning_rate": 8.721413871303992e-06, "loss": 0.4036, "step": 2629 }, { "epoch": 0.2559610705596107, "grad_norm": 1.4181157816170762, "learning_rate": 8.720361028090095e-06, "loss": 0.4224, "step": 2630 }, { "epoch": 0.25605839416058396, "grad_norm": 1.7898330028782403, "learning_rate": 8.719307815181638e-06, "loss": 0.7314, "step": 2631 }, { "epoch": 0.25615571776155716, "grad_norm": 1.6886124652733636, "learning_rate": 8.718254232683276e-06, "loss": 0.3513, "step": 2632 }, { "epoch": 0.2562530413625304, "grad_norm": 1.2562027575971086, "learning_rate": 8.717200280699705e-06, "loss": 0.284, "step": 2633 }, { "epoch": 0.25635036496350366, "grad_norm": 1.4899596514775177, "learning_rate": 8.716145959335658e-06, "loss": 0.2778, "step": 2634 }, { "epoch": 0.25644768856447686, "grad_norm": 1.1699021581347986, "learning_rate": 8.715091268695903e-06, "loss": 0.3163, "step": 2635 }, { "epoch": 0.2565450121654501, "grad_norm": 1.020653527182934, "learning_rate": 8.714036208885243e-06, "loss": 0.2191, "step": 2636 }, { "epoch": 0.25664233576642337, "grad_norm": 1.5373942827305265, "learning_rate": 8.712980780008526e-06, "loss": 0.4183, "step": 2637 }, { "epoch": 0.25673965936739657, "grad_norm": 1.1268355971062876, "learning_rate": 8.711924982170623e-06, "loss": 0.2851, "step": 2638 }, { "epoch": 0.2568369829683698, "grad_norm": 1.25228244300652, "learning_rate": 8.710868815476456e-06, "loss": 0.1963, "step": 2639 }, { "epoch": 0.2569343065693431, "grad_norm": 1.3905442460862172, "learning_rate": 8.709812280030971e-06, "loss": 0.3648, "step": 2640 }, { "epoch": 0.2570316301703163, "grad_norm": 1.5078176389616522, "learning_rate": 8.708755375939162e-06, "loss": 0.4131, "step": 2641 }, { "epoch": 0.2571289537712895, "grad_norm": 1.4441200079463874, "learning_rate": 8.70769810330605e-06, "loss": 0.4047, "step": 2642 }, { "epoch": 0.2572262773722628, "grad_norm": 1.3883503516178042, "learning_rate": 8.7066404622367e-06, "loss": 0.3308, "step": 2643 }, { "epoch": 0.25732360097323603, "grad_norm": 1.7851696055640995, "learning_rate": 8.705582452836208e-06, "loss": 0.336, "step": 2644 }, { "epoch": 0.25742092457420923, "grad_norm": 1.309628752016819, "learning_rate": 8.70452407520971e-06, "loss": 0.3462, "step": 2645 }, { "epoch": 0.2575182481751825, "grad_norm": 1.3618437175125289, "learning_rate": 8.703465329462379e-06, "loss": 0.3047, "step": 2646 }, { "epoch": 0.25761557177615574, "grad_norm": 1.5821297320572192, "learning_rate": 8.702406215699421e-06, "loss": 0.2318, "step": 2647 }, { "epoch": 0.25771289537712894, "grad_norm": 1.4729014225467234, "learning_rate": 8.701346734026082e-06, "loss": 0.3147, "step": 2648 }, { "epoch": 0.2578102189781022, "grad_norm": 1.6287249640343295, "learning_rate": 8.700286884547642e-06, "loss": 0.5808, "step": 2649 }, { "epoch": 0.25790754257907544, "grad_norm": 1.2824109098190504, "learning_rate": 8.69922666736942e-06, "loss": 0.3836, "step": 2650 }, { "epoch": 0.25800486618004864, "grad_norm": 1.5096397594183033, "learning_rate": 8.69816608259677e-06, "loss": 0.3804, "step": 2651 }, { "epoch": 0.2581021897810219, "grad_norm": 1.7247008216261863, "learning_rate": 8.697105130335084e-06, "loss": 0.3378, "step": 2652 }, { "epoch": 0.25819951338199515, "grad_norm": 1.5872130127065738, "learning_rate": 8.69604381068979e-06, "loss": 0.4369, "step": 2653 }, { "epoch": 0.25829683698296835, "grad_norm": 1.5909295650502344, "learning_rate": 8.694982123766348e-06, "loss": 0.3554, "step": 2654 }, { "epoch": 0.2583941605839416, "grad_norm": 1.7135035115393307, "learning_rate": 8.693920069670265e-06, "loss": 0.4869, "step": 2655 }, { "epoch": 0.25849148418491485, "grad_norm": 1.3366492087792976, "learning_rate": 8.692857648507071e-06, "loss": 0.3102, "step": 2656 }, { "epoch": 0.25858880778588805, "grad_norm": 1.2478048122674565, "learning_rate": 8.691794860382345e-06, "loss": 0.3722, "step": 2657 }, { "epoch": 0.2586861313868613, "grad_norm": 1.5080776475601503, "learning_rate": 8.690731705401694e-06, "loss": 0.316, "step": 2658 }, { "epoch": 0.25878345498783456, "grad_norm": 1.443811575497146, "learning_rate": 8.689668183670763e-06, "loss": 0.2875, "step": 2659 }, { "epoch": 0.25888077858880776, "grad_norm": 1.7036441396737687, "learning_rate": 8.688604295295238e-06, "loss": 0.4025, "step": 2660 }, { "epoch": 0.258978102189781, "grad_norm": 1.4234806259439374, "learning_rate": 8.687540040380838e-06, "loss": 0.4452, "step": 2661 }, { "epoch": 0.25907542579075427, "grad_norm": 1.2741393980838642, "learning_rate": 8.686475419033315e-06, "loss": 0.2237, "step": 2662 }, { "epoch": 0.2591727493917275, "grad_norm": 1.1826384563722763, "learning_rate": 8.685410431358464e-06, "loss": 0.3398, "step": 2663 }, { "epoch": 0.2592700729927007, "grad_norm": 1.5757741509023746, "learning_rate": 8.684345077462117e-06, "loss": 0.3846, "step": 2664 }, { "epoch": 0.25936739659367397, "grad_norm": 1.475707275733763, "learning_rate": 8.683279357450131e-06, "loss": 0.2804, "step": 2665 }, { "epoch": 0.2594647201946472, "grad_norm": 1.4241797244636094, "learning_rate": 8.682213271428415e-06, "loss": 0.2553, "step": 2666 }, { "epoch": 0.2595620437956204, "grad_norm": 1.1548194283365685, "learning_rate": 8.6811468195029e-06, "loss": 0.3118, "step": 2667 }, { "epoch": 0.2596593673965937, "grad_norm": 1.5918458521510486, "learning_rate": 8.680080001779564e-06, "loss": 0.4525, "step": 2668 }, { "epoch": 0.25975669099756693, "grad_norm": 1.5508802560099362, "learning_rate": 8.679012818364416e-06, "loss": 0.4163, "step": 2669 }, { "epoch": 0.25985401459854013, "grad_norm": 2.4434630008376232, "learning_rate": 8.677945269363504e-06, "loss": 0.4372, "step": 2670 }, { "epoch": 0.2599513381995134, "grad_norm": 1.5324792404386718, "learning_rate": 8.676877354882907e-06, "loss": 0.3514, "step": 2671 }, { "epoch": 0.26004866180048664, "grad_norm": 2.0012246197360493, "learning_rate": 8.67580907502875e-06, "loss": 0.5067, "step": 2672 }, { "epoch": 0.26014598540145983, "grad_norm": 1.5232176793280576, "learning_rate": 8.674740429907186e-06, "loss": 0.4174, "step": 2673 }, { "epoch": 0.2602433090024331, "grad_norm": 1.3322865976928646, "learning_rate": 8.673671419624405e-06, "loss": 0.4095, "step": 2674 }, { "epoch": 0.26034063260340634, "grad_norm": 1.514406481268828, "learning_rate": 8.672602044286638e-06, "loss": 0.5915, "step": 2675 }, { "epoch": 0.26043795620437954, "grad_norm": 1.528467413797325, "learning_rate": 8.67153230400015e-06, "loss": 0.4018, "step": 2676 }, { "epoch": 0.2605352798053528, "grad_norm": 1.4367698805538582, "learning_rate": 8.670462198871237e-06, "loss": 0.4115, "step": 2677 }, { "epoch": 0.26063260340632605, "grad_norm": 1.6984444092554742, "learning_rate": 8.66939172900624e-06, "loss": 0.59, "step": 2678 }, { "epoch": 0.26072992700729924, "grad_norm": 1.4698751482200727, "learning_rate": 8.668320894511534e-06, "loss": 0.4144, "step": 2679 }, { "epoch": 0.2608272506082725, "grad_norm": 1.5003641004534345, "learning_rate": 8.667249695493525e-06, "loss": 0.4294, "step": 2680 }, { "epoch": 0.26092457420924575, "grad_norm": 1.3123452231563197, "learning_rate": 8.666178132058659e-06, "loss": 0.3408, "step": 2681 }, { "epoch": 0.261021897810219, "grad_norm": 1.5184535738040659, "learning_rate": 8.665106204313418e-06, "loss": 0.3662, "step": 2682 }, { "epoch": 0.2611192214111922, "grad_norm": 1.0623024588559944, "learning_rate": 8.664033912364321e-06, "loss": 0.2953, "step": 2683 }, { "epoch": 0.26121654501216546, "grad_norm": 1.4112725317400583, "learning_rate": 8.662961256317923e-06, "loss": 0.3825, "step": 2684 }, { "epoch": 0.2613138686131387, "grad_norm": 2.2729536767377065, "learning_rate": 8.661888236280813e-06, "loss": 0.5791, "step": 2685 }, { "epoch": 0.2614111922141119, "grad_norm": 2.2747614305768504, "learning_rate": 8.660814852359617e-06, "loss": 0.4859, "step": 2686 }, { "epoch": 0.26150851581508516, "grad_norm": 1.6069562939941755, "learning_rate": 8.659741104661002e-06, "loss": 0.5254, "step": 2687 }, { "epoch": 0.2616058394160584, "grad_norm": 1.3624858995460438, "learning_rate": 8.658666993291662e-06, "loss": 0.3904, "step": 2688 }, { "epoch": 0.2617031630170316, "grad_norm": 1.2954398797770197, "learning_rate": 8.657592518358332e-06, "loss": 0.3789, "step": 2689 }, { "epoch": 0.26180048661800487, "grad_norm": 1.4158991903907718, "learning_rate": 8.656517679967788e-06, "loss": 0.3732, "step": 2690 }, { "epoch": 0.2618978102189781, "grad_norm": 1.3754641009755615, "learning_rate": 8.655442478226835e-06, "loss": 0.3035, "step": 2691 }, { "epoch": 0.2619951338199513, "grad_norm": 1.3522608722257456, "learning_rate": 8.654366913242316e-06, "loss": 0.347, "step": 2692 }, { "epoch": 0.2620924574209246, "grad_norm": 1.2764013704656585, "learning_rate": 8.65329098512111e-06, "loss": 0.4207, "step": 2693 }, { "epoch": 0.2621897810218978, "grad_norm": 1.4009476621873176, "learning_rate": 8.652214693970133e-06, "loss": 0.4628, "step": 2694 }, { "epoch": 0.262287104622871, "grad_norm": 1.3860597575903169, "learning_rate": 8.65113803989634e-06, "loss": 0.3844, "step": 2695 }, { "epoch": 0.2623844282238443, "grad_norm": 1.5636622874346966, "learning_rate": 8.650061023006711e-06, "loss": 0.6239, "step": 2696 }, { "epoch": 0.26248175182481753, "grad_norm": 1.3677003606993399, "learning_rate": 8.648983643408276e-06, "loss": 0.4319, "step": 2697 }, { "epoch": 0.26257907542579073, "grad_norm": 1.4720449620822884, "learning_rate": 8.647905901208096e-06, "loss": 0.4824, "step": 2698 }, { "epoch": 0.262676399026764, "grad_norm": 1.4180687903221385, "learning_rate": 8.646827796513262e-06, "loss": 0.539, "step": 2699 }, { "epoch": 0.26277372262773724, "grad_norm": 1.3679667840460958, "learning_rate": 8.64574932943091e-06, "loss": 0.4588, "step": 2700 }, { "epoch": 0.26287104622871044, "grad_norm": 1.125542933529368, "learning_rate": 8.644670500068205e-06, "loss": 0.3441, "step": 2701 }, { "epoch": 0.2629683698296837, "grad_norm": 1.5641789380613262, "learning_rate": 8.643591308532353e-06, "loss": 0.4998, "step": 2702 }, { "epoch": 0.26306569343065694, "grad_norm": 1.3425870342919086, "learning_rate": 8.642511754930592e-06, "loss": 0.4678, "step": 2703 }, { "epoch": 0.2631630170316302, "grad_norm": 1.3010588112101855, "learning_rate": 8.641431839370199e-06, "loss": 0.4005, "step": 2704 }, { "epoch": 0.2632603406326034, "grad_norm": 1.0067860306988832, "learning_rate": 8.640351561958487e-06, "loss": 0.2243, "step": 2705 }, { "epoch": 0.26335766423357665, "grad_norm": 1.4713856201410829, "learning_rate": 8.639270922802802e-06, "loss": 0.4325, "step": 2706 }, { "epoch": 0.2634549878345499, "grad_norm": 1.55962351192921, "learning_rate": 8.63818992201053e-06, "loss": 0.5307, "step": 2707 }, { "epoch": 0.2635523114355231, "grad_norm": 1.4073629002175063, "learning_rate": 8.637108559689088e-06, "loss": 0.3329, "step": 2708 }, { "epoch": 0.26364963503649635, "grad_norm": 1.2827086170801953, "learning_rate": 8.636026835945933e-06, "loss": 0.3095, "step": 2709 }, { "epoch": 0.2637469586374696, "grad_norm": 1.4100209855486194, "learning_rate": 8.634944750888556e-06, "loss": 0.3033, "step": 2710 }, { "epoch": 0.2638442822384428, "grad_norm": 1.343279822840104, "learning_rate": 8.633862304624484e-06, "loss": 0.402, "step": 2711 }, { "epoch": 0.26394160583941606, "grad_norm": 1.4374516520455163, "learning_rate": 8.632779497261284e-06, "loss": 0.4574, "step": 2712 }, { "epoch": 0.2640389294403893, "grad_norm": 1.1554648336740065, "learning_rate": 8.63169632890655e-06, "loss": 0.3091, "step": 2713 }, { "epoch": 0.2641362530413625, "grad_norm": 1.5304191047752203, "learning_rate": 8.630612799667923e-06, "loss": 0.5392, "step": 2714 }, { "epoch": 0.26423357664233577, "grad_norm": 1.9364214941018973, "learning_rate": 8.629528909653067e-06, "loss": 0.4705, "step": 2715 }, { "epoch": 0.264330900243309, "grad_norm": 1.5176007479008755, "learning_rate": 8.628444658969694e-06, "loss": 0.3969, "step": 2716 }, { "epoch": 0.2644282238442822, "grad_norm": 1.3882529784475808, "learning_rate": 8.627360047725543e-06, "loss": 0.4672, "step": 2717 }, { "epoch": 0.26452554744525547, "grad_norm": 1.0419873824719341, "learning_rate": 8.626275076028397e-06, "loss": 0.2247, "step": 2718 }, { "epoch": 0.2646228710462287, "grad_norm": 1.4147177174052021, "learning_rate": 8.625189743986068e-06, "loss": 0.3922, "step": 2719 }, { "epoch": 0.2647201946472019, "grad_norm": 1.3513629744004096, "learning_rate": 8.624104051706405e-06, "loss": 0.415, "step": 2720 }, { "epoch": 0.2648175182481752, "grad_norm": 1.3701041364422066, "learning_rate": 8.623017999297294e-06, "loss": 0.4329, "step": 2721 }, { "epoch": 0.26491484184914843, "grad_norm": 1.5102917148163044, "learning_rate": 8.621931586866658e-06, "loss": 0.4104, "step": 2722 }, { "epoch": 0.26501216545012163, "grad_norm": 1.4836677874290423, "learning_rate": 8.620844814522455e-06, "loss": 0.5131, "step": 2723 }, { "epoch": 0.2651094890510949, "grad_norm": 1.2607364196409017, "learning_rate": 8.619757682372675e-06, "loss": 0.3856, "step": 2724 }, { "epoch": 0.26520681265206814, "grad_norm": 1.4082529003642341, "learning_rate": 8.61867019052535e-06, "loss": 0.4719, "step": 2725 }, { "epoch": 0.2653041362530414, "grad_norm": 1.4276001080419702, "learning_rate": 8.617582339088545e-06, "loss": 0.2825, "step": 2726 }, { "epoch": 0.2654014598540146, "grad_norm": 1.4331001450603844, "learning_rate": 8.61649412817036e-06, "loss": 0.5104, "step": 2727 }, { "epoch": 0.26549878345498784, "grad_norm": 1.358868383954866, "learning_rate": 8.615405557878929e-06, "loss": 0.4359, "step": 2728 }, { "epoch": 0.2655961070559611, "grad_norm": 1.678463370024911, "learning_rate": 8.614316628322427e-06, "loss": 0.4658, "step": 2729 }, { "epoch": 0.2656934306569343, "grad_norm": 1.2268291596580612, "learning_rate": 8.61322733960906e-06, "loss": 0.2337, "step": 2730 }, { "epoch": 0.26579075425790755, "grad_norm": 0.9437944818586388, "learning_rate": 8.61213769184707e-06, "loss": 0.2525, "step": 2731 }, { "epoch": 0.2658880778588808, "grad_norm": 1.2480121542051432, "learning_rate": 8.611047685144737e-06, "loss": 0.2656, "step": 2732 }, { "epoch": 0.265985401459854, "grad_norm": 1.5255853623894704, "learning_rate": 8.609957319610377e-06, "loss": 0.5071, "step": 2733 }, { "epoch": 0.26608272506082725, "grad_norm": 1.5847632660353408, "learning_rate": 8.60886659535234e-06, "loss": 0.4018, "step": 2734 }, { "epoch": 0.2661800486618005, "grad_norm": 1.3469310633769445, "learning_rate": 8.60777551247901e-06, "loss": 0.451, "step": 2735 }, { "epoch": 0.2662773722627737, "grad_norm": 1.3995570810499534, "learning_rate": 8.60668407109881e-06, "loss": 0.4991, "step": 2736 }, { "epoch": 0.26637469586374696, "grad_norm": 1.5198269828404072, "learning_rate": 8.605592271320199e-06, "loss": 0.4266, "step": 2737 }, { "epoch": 0.2664720194647202, "grad_norm": 1.3040716122405567, "learning_rate": 8.604500113251666e-06, "loss": 0.3465, "step": 2738 }, { "epoch": 0.2665693430656934, "grad_norm": 1.3643506509353014, "learning_rate": 8.60340759700174e-06, "loss": 0.4355, "step": 2739 }, { "epoch": 0.26666666666666666, "grad_norm": 1.026074804296968, "learning_rate": 8.602314722678989e-06, "loss": 0.2507, "step": 2740 }, { "epoch": 0.2667639902676399, "grad_norm": 1.3894972782664292, "learning_rate": 8.601221490392009e-06, "loss": 0.3981, "step": 2741 }, { "epoch": 0.2668613138686131, "grad_norm": 1.3071238902768438, "learning_rate": 8.600127900249435e-06, "loss": 0.5138, "step": 2742 }, { "epoch": 0.26695863746958637, "grad_norm": 1.61583752885221, "learning_rate": 8.59903395235994e-06, "loss": 0.5072, "step": 2743 }, { "epoch": 0.2670559610705596, "grad_norm": 1.3679578518174673, "learning_rate": 8.597939646832227e-06, "loss": 0.3754, "step": 2744 }, { "epoch": 0.2671532846715328, "grad_norm": 1.0943121419181938, "learning_rate": 8.596844983775042e-06, "loss": 0.2457, "step": 2745 }, { "epoch": 0.2672506082725061, "grad_norm": 1.176479145152164, "learning_rate": 8.59574996329716e-06, "loss": 0.3687, "step": 2746 }, { "epoch": 0.2673479318734793, "grad_norm": 1.2666642902167933, "learning_rate": 8.594654585507393e-06, "loss": 0.2664, "step": 2747 }, { "epoch": 0.2674452554744526, "grad_norm": 1.3951377938692817, "learning_rate": 8.59355885051459e-06, "loss": 0.4035, "step": 2748 }, { "epoch": 0.2675425790754258, "grad_norm": 1.2722832533001889, "learning_rate": 8.592462758427635e-06, "loss": 0.4643, "step": 2749 }, { "epoch": 0.26763990267639903, "grad_norm": 1.2157588835981379, "learning_rate": 8.59136630935545e-06, "loss": 0.3612, "step": 2750 }, { "epoch": 0.2677372262773723, "grad_norm": 1.0785566378114326, "learning_rate": 8.590269503406986e-06, "loss": 0.3403, "step": 2751 }, { "epoch": 0.2678345498783455, "grad_norm": 1.2447292785758555, "learning_rate": 8.589172340691235e-06, "loss": 0.3873, "step": 2752 }, { "epoch": 0.26793187347931874, "grad_norm": 1.166378916722292, "learning_rate": 8.588074821317222e-06, "loss": 0.3264, "step": 2753 }, { "epoch": 0.268029197080292, "grad_norm": 1.2197572995933224, "learning_rate": 8.586976945394008e-06, "loss": 0.3793, "step": 2754 }, { "epoch": 0.2681265206812652, "grad_norm": 1.6234832434134598, "learning_rate": 8.58587871303069e-06, "loss": 0.5521, "step": 2755 }, { "epoch": 0.26822384428223844, "grad_norm": 1.4760533014923396, "learning_rate": 8.584780124336403e-06, "loss": 0.5024, "step": 2756 }, { "epoch": 0.2683211678832117, "grad_norm": 1.4156240197993037, "learning_rate": 8.58368117942031e-06, "loss": 0.2848, "step": 2757 }, { "epoch": 0.2684184914841849, "grad_norm": 1.9092848960981135, "learning_rate": 8.582581878391614e-06, "loss": 0.4053, "step": 2758 }, { "epoch": 0.26851581508515815, "grad_norm": 1.2158050168465575, "learning_rate": 8.581482221359557e-06, "loss": 0.2709, "step": 2759 }, { "epoch": 0.2686131386861314, "grad_norm": 1.5515245630825936, "learning_rate": 8.580382208433408e-06, "loss": 0.4549, "step": 2760 }, { "epoch": 0.2687104622871046, "grad_norm": 1.6603384837941395, "learning_rate": 8.57928183972248e-06, "loss": 0.3316, "step": 2761 }, { "epoch": 0.26880778588807785, "grad_norm": 1.5595744401068579, "learning_rate": 8.578181115336114e-06, "loss": 0.5733, "step": 2762 }, { "epoch": 0.2689051094890511, "grad_norm": 1.3547786308004384, "learning_rate": 8.577080035383693e-06, "loss": 0.5295, "step": 2763 }, { "epoch": 0.2690024330900243, "grad_norm": 1.2889595684224195, "learning_rate": 8.57597859997463e-06, "loss": 0.3876, "step": 2764 }, { "epoch": 0.26909975669099756, "grad_norm": 1.5401948742368967, "learning_rate": 8.574876809218375e-06, "loss": 0.4847, "step": 2765 }, { "epoch": 0.2691970802919708, "grad_norm": 1.5886773556984544, "learning_rate": 8.573774663224414e-06, "loss": 0.4746, "step": 2766 }, { "epoch": 0.269294403892944, "grad_norm": 1.2747463684628804, "learning_rate": 8.572672162102269e-06, "loss": 0.2568, "step": 2767 }, { "epoch": 0.26939172749391727, "grad_norm": 1.1674673988315882, "learning_rate": 8.571569305961495e-06, "loss": 0.4329, "step": 2768 }, { "epoch": 0.2694890510948905, "grad_norm": 1.6882113617461265, "learning_rate": 8.570466094911684e-06, "loss": 0.6891, "step": 2769 }, { "epoch": 0.2695863746958638, "grad_norm": 1.6660737969996857, "learning_rate": 8.569362529062461e-06, "loss": 0.5887, "step": 2770 }, { "epoch": 0.26968369829683697, "grad_norm": 1.1653044559020052, "learning_rate": 8.568258608523491e-06, "loss": 0.2452, "step": 2771 }, { "epoch": 0.2697810218978102, "grad_norm": 1.5681206888540218, "learning_rate": 8.567154333404471e-06, "loss": 0.4952, "step": 2772 }, { "epoch": 0.2698783454987835, "grad_norm": 1.3994591247160806, "learning_rate": 8.56604970381513e-06, "loss": 0.2848, "step": 2773 }, { "epoch": 0.2699756690997567, "grad_norm": 1.300192393224716, "learning_rate": 8.564944719865238e-06, "loss": 0.3924, "step": 2774 }, { "epoch": 0.27007299270072993, "grad_norm": 1.4412015443912716, "learning_rate": 8.5638393816646e-06, "loss": 0.4531, "step": 2775 }, { "epoch": 0.2701703163017032, "grad_norm": 1.4360872043281558, "learning_rate": 8.56273368932305e-06, "loss": 0.4571, "step": 2776 }, { "epoch": 0.2702676399026764, "grad_norm": 1.5811581309774965, "learning_rate": 8.561627642950465e-06, "loss": 0.4638, "step": 2777 }, { "epoch": 0.27036496350364964, "grad_norm": 1.7924696283680308, "learning_rate": 8.560521242656751e-06, "loss": 0.2922, "step": 2778 }, { "epoch": 0.2704622871046229, "grad_norm": 1.7929283253885162, "learning_rate": 8.559414488551854e-06, "loss": 0.6197, "step": 2779 }, { "epoch": 0.2705596107055961, "grad_norm": 1.5593955671219286, "learning_rate": 8.558307380745751e-06, "loss": 0.5448, "step": 2780 }, { "epoch": 0.27065693430656934, "grad_norm": 1.3760682204767343, "learning_rate": 8.557199919348455e-06, "loss": 0.4434, "step": 2781 }, { "epoch": 0.2707542579075426, "grad_norm": 3.203989647256839, "learning_rate": 8.556092104470019e-06, "loss": 0.4323, "step": 2782 }, { "epoch": 0.2708515815085158, "grad_norm": 1.3460764595466628, "learning_rate": 8.554983936220525e-06, "loss": 0.3367, "step": 2783 }, { "epoch": 0.27094890510948905, "grad_norm": 1.6160732245190643, "learning_rate": 8.553875414710088e-06, "loss": 0.5301, "step": 2784 }, { "epoch": 0.2710462287104623, "grad_norm": 1.5749454761331767, "learning_rate": 8.552766540048872e-06, "loss": 0.3741, "step": 2785 }, { "epoch": 0.2711435523114355, "grad_norm": 1.150423059184381, "learning_rate": 8.551657312347057e-06, "loss": 0.2796, "step": 2786 }, { "epoch": 0.27124087591240875, "grad_norm": 1.4217054664233575, "learning_rate": 8.550547731714874e-06, "loss": 0.4543, "step": 2787 }, { "epoch": 0.271338199513382, "grad_norm": 1.470206005686861, "learning_rate": 8.54943779826258e-06, "loss": 0.438, "step": 2788 }, { "epoch": 0.2714355231143552, "grad_norm": 1.5766219733733982, "learning_rate": 8.54832751210047e-06, "loss": 0.4966, "step": 2789 }, { "epoch": 0.27153284671532846, "grad_norm": 1.2135102045567707, "learning_rate": 8.547216873338876e-06, "loss": 0.358, "step": 2790 }, { "epoch": 0.2716301703163017, "grad_norm": 1.4595225616938101, "learning_rate": 8.546105882088158e-06, "loss": 0.2225, "step": 2791 }, { "epoch": 0.27172749391727496, "grad_norm": 1.3363330099445299, "learning_rate": 8.54499453845872e-06, "loss": 0.3914, "step": 2792 }, { "epoch": 0.27182481751824816, "grad_norm": 1.3646141902938869, "learning_rate": 8.543882842560997e-06, "loss": 0.4558, "step": 2793 }, { "epoch": 0.2719221411192214, "grad_norm": 1.3464180828493995, "learning_rate": 8.542770794505456e-06, "loss": 0.4786, "step": 2794 }, { "epoch": 0.27201946472019467, "grad_norm": 1.044551377255888, "learning_rate": 8.541658394402606e-06, "loss": 0.303, "step": 2795 }, { "epoch": 0.27211678832116787, "grad_norm": 1.6706499263846184, "learning_rate": 8.540545642362982e-06, "loss": 0.4033, "step": 2796 }, { "epoch": 0.2722141119221411, "grad_norm": 1.3164784669169094, "learning_rate": 8.539432538497162e-06, "loss": 0.4343, "step": 2797 }, { "epoch": 0.2723114355231144, "grad_norm": 1.6044535524867656, "learning_rate": 8.538319082915757e-06, "loss": 0.3641, "step": 2798 }, { "epoch": 0.2724087591240876, "grad_norm": 1.9897822202433566, "learning_rate": 8.537205275729406e-06, "loss": 0.48, "step": 2799 }, { "epoch": 0.2725060827250608, "grad_norm": 1.4110579632506512, "learning_rate": 8.536091117048794e-06, "loss": 0.4798, "step": 2800 }, { "epoch": 0.2726034063260341, "grad_norm": 1.4415607317920478, "learning_rate": 8.534976606984636e-06, "loss": 0.343, "step": 2801 }, { "epoch": 0.2727007299270073, "grad_norm": 1.6363482727427716, "learning_rate": 8.53386174564768e-06, "loss": 0.6087, "step": 2802 }, { "epoch": 0.27279805352798053, "grad_norm": 1.1272383780084416, "learning_rate": 8.532746533148708e-06, "loss": 0.2444, "step": 2803 }, { "epoch": 0.2728953771289538, "grad_norm": 1.712140222332907, "learning_rate": 8.531630969598544e-06, "loss": 0.6702, "step": 2804 }, { "epoch": 0.272992700729927, "grad_norm": 1.474485197586056, "learning_rate": 8.530515055108038e-06, "loss": 0.3876, "step": 2805 }, { "epoch": 0.27309002433090024, "grad_norm": 1.2926370708159094, "learning_rate": 8.529398789788082e-06, "loss": 0.3239, "step": 2806 }, { "epoch": 0.2731873479318735, "grad_norm": 1.1171205940753008, "learning_rate": 8.528282173749599e-06, "loss": 0.3135, "step": 2807 }, { "epoch": 0.2732846715328467, "grad_norm": 1.3561762741371761, "learning_rate": 8.527165207103546e-06, "loss": 0.4686, "step": 2808 }, { "epoch": 0.27338199513381994, "grad_norm": 1.3082129080843141, "learning_rate": 8.52604788996092e-06, "loss": 0.4274, "step": 2809 }, { "epoch": 0.2734793187347932, "grad_norm": 1.2958697823961909, "learning_rate": 8.524930222432748e-06, "loss": 0.4334, "step": 2810 }, { "epoch": 0.2735766423357664, "grad_norm": 1.4541266485936315, "learning_rate": 8.523812204630093e-06, "loss": 0.5685, "step": 2811 }, { "epoch": 0.27367396593673965, "grad_norm": 1.3303596097899522, "learning_rate": 8.522693836664052e-06, "loss": 0.4305, "step": 2812 }, { "epoch": 0.2737712895377129, "grad_norm": 1.220005269273729, "learning_rate": 8.521575118645761e-06, "loss": 0.4281, "step": 2813 }, { "epoch": 0.27386861313868616, "grad_norm": 1.0981673276035366, "learning_rate": 8.520456050686384e-06, "loss": 0.3641, "step": 2814 }, { "epoch": 0.27396593673965935, "grad_norm": 1.4310281439998578, "learning_rate": 8.519336632897128e-06, "loss": 0.557, "step": 2815 }, { "epoch": 0.2740632603406326, "grad_norm": 1.345841620727785, "learning_rate": 8.518216865389227e-06, "loss": 0.3991, "step": 2816 }, { "epoch": 0.27416058394160586, "grad_norm": 1.6650753610183784, "learning_rate": 8.517096748273951e-06, "loss": 0.3624, "step": 2817 }, { "epoch": 0.27425790754257906, "grad_norm": 1.2633026385457689, "learning_rate": 8.515976281662613e-06, "loss": 0.349, "step": 2818 }, { "epoch": 0.2743552311435523, "grad_norm": 1.392024932172172, "learning_rate": 8.514855465666546e-06, "loss": 0.4514, "step": 2819 }, { "epoch": 0.27445255474452557, "grad_norm": 1.4295145565971665, "learning_rate": 8.513734300397135e-06, "loss": 0.5668, "step": 2820 }, { "epoch": 0.27454987834549877, "grad_norm": 1.0967459926110283, "learning_rate": 8.512612785965787e-06, "loss": 0.1808, "step": 2821 }, { "epoch": 0.274647201946472, "grad_norm": 1.4843839946273536, "learning_rate": 8.511490922483946e-06, "loss": 0.4352, "step": 2822 }, { "epoch": 0.2747445255474453, "grad_norm": 1.339649820333997, "learning_rate": 8.510368710063093e-06, "loss": 0.3137, "step": 2823 }, { "epoch": 0.27484184914841847, "grad_norm": 1.32567882782868, "learning_rate": 8.509246148814745e-06, "loss": 0.4089, "step": 2824 }, { "epoch": 0.2749391727493917, "grad_norm": 1.2497731956714773, "learning_rate": 8.50812323885045e-06, "loss": 0.382, "step": 2825 }, { "epoch": 0.275036496350365, "grad_norm": 1.5771259884963846, "learning_rate": 8.506999980281791e-06, "loss": 0.501, "step": 2826 }, { "epoch": 0.2751338199513382, "grad_norm": 1.3295615561309837, "learning_rate": 8.505876373220393e-06, "loss": 0.3635, "step": 2827 }, { "epoch": 0.27523114355231143, "grad_norm": 1.55543645713159, "learning_rate": 8.504752417777899e-06, "loss": 0.2986, "step": 2828 }, { "epoch": 0.2753284671532847, "grad_norm": 1.421283473121396, "learning_rate": 8.503628114066008e-06, "loss": 0.4931, "step": 2829 }, { "epoch": 0.2754257907542579, "grad_norm": 1.1988827610585986, "learning_rate": 8.502503462196435e-06, "loss": 0.3272, "step": 2830 }, { "epoch": 0.27552311435523114, "grad_norm": 1.6163491550131937, "learning_rate": 8.501378462280941e-06, "loss": 0.5794, "step": 2831 }, { "epoch": 0.2756204379562044, "grad_norm": 1.6499795796835799, "learning_rate": 8.500253114431316e-06, "loss": 0.3668, "step": 2832 }, { "epoch": 0.27571776155717764, "grad_norm": 1.7305434923413188, "learning_rate": 8.499127418759388e-06, "loss": 0.5291, "step": 2833 }, { "epoch": 0.27581508515815084, "grad_norm": 1.4062980643641485, "learning_rate": 8.498001375377018e-06, "loss": 0.4645, "step": 2834 }, { "epoch": 0.2759124087591241, "grad_norm": 1.2961260919749351, "learning_rate": 8.496874984396101e-06, "loss": 0.2517, "step": 2835 }, { "epoch": 0.27600973236009735, "grad_norm": 1.4273972641674804, "learning_rate": 8.495748245928568e-06, "loss": 0.4705, "step": 2836 }, { "epoch": 0.27610705596107055, "grad_norm": 1.1525746776855315, "learning_rate": 8.494621160086383e-06, "loss": 0.3747, "step": 2837 }, { "epoch": 0.2762043795620438, "grad_norm": 1.6083708658269757, "learning_rate": 8.493493726981545e-06, "loss": 0.5754, "step": 2838 }, { "epoch": 0.27630170316301705, "grad_norm": 1.6380932846987073, "learning_rate": 8.492365946726087e-06, "loss": 0.4668, "step": 2839 }, { "epoch": 0.27639902676399025, "grad_norm": 1.3587028332396105, "learning_rate": 8.491237819432081e-06, "loss": 0.3466, "step": 2840 }, { "epoch": 0.2764963503649635, "grad_norm": 1.5812508624530597, "learning_rate": 8.490109345211625e-06, "loss": 0.628, "step": 2841 }, { "epoch": 0.27659367396593676, "grad_norm": 1.359461682943084, "learning_rate": 8.48898052417686e-06, "loss": 0.4799, "step": 2842 }, { "epoch": 0.27669099756690996, "grad_norm": 1.3773089875645015, "learning_rate": 8.487851356439953e-06, "loss": 0.3064, "step": 2843 }, { "epoch": 0.2767883211678832, "grad_norm": 1.445505572645753, "learning_rate": 8.486721842113114e-06, "loss": 0.4629, "step": 2844 }, { "epoch": 0.27688564476885646, "grad_norm": 2.1729540442826796, "learning_rate": 8.485591981308584e-06, "loss": 0.501, "step": 2845 }, { "epoch": 0.27698296836982966, "grad_norm": 1.2698072866971275, "learning_rate": 8.484461774138635e-06, "loss": 0.3354, "step": 2846 }, { "epoch": 0.2770802919708029, "grad_norm": 1.2270792461817257, "learning_rate": 8.483331220715578e-06, "loss": 0.2925, "step": 2847 }, { "epoch": 0.27717761557177617, "grad_norm": 1.4982940191444252, "learning_rate": 8.482200321151757e-06, "loss": 0.4372, "step": 2848 }, { "epoch": 0.27727493917274937, "grad_norm": 1.7962422459275051, "learning_rate": 8.48106907555955e-06, "loss": 0.2514, "step": 2849 }, { "epoch": 0.2773722627737226, "grad_norm": 1.1765428275481227, "learning_rate": 8.479937484051368e-06, "loss": 0.2466, "step": 2850 }, { "epoch": 0.2774695863746959, "grad_norm": 1.3671035304850088, "learning_rate": 8.47880554673966e-06, "loss": 0.4388, "step": 2851 }, { "epoch": 0.2775669099756691, "grad_norm": 1.584083262413021, "learning_rate": 8.477673263736908e-06, "loss": 0.3117, "step": 2852 }, { "epoch": 0.2776642335766423, "grad_norm": 1.6251518472003594, "learning_rate": 8.476540635155623e-06, "loss": 0.4661, "step": 2853 }, { "epoch": 0.2777615571776156, "grad_norm": 1.6392857489539867, "learning_rate": 8.475407661108361e-06, "loss": 0.354, "step": 2854 }, { "epoch": 0.27785888077858883, "grad_norm": 1.3195625296951223, "learning_rate": 8.474274341707702e-06, "loss": 0.3744, "step": 2855 }, { "epoch": 0.27795620437956203, "grad_norm": 1.34410915454318, "learning_rate": 8.473140677066267e-06, "loss": 0.4069, "step": 2856 }, { "epoch": 0.2780535279805353, "grad_norm": 1.0527413957181246, "learning_rate": 8.472006667296709e-06, "loss": 0.2776, "step": 2857 }, { "epoch": 0.27815085158150854, "grad_norm": 1.496471387248685, "learning_rate": 8.470872312511714e-06, "loss": 0.3642, "step": 2858 }, { "epoch": 0.27824817518248174, "grad_norm": 1.532429299396127, "learning_rate": 8.469737612824001e-06, "loss": 0.44, "step": 2859 }, { "epoch": 0.278345498783455, "grad_norm": 1.601112711944827, "learning_rate": 8.468602568346332e-06, "loss": 0.421, "step": 2860 }, { "epoch": 0.27844282238442825, "grad_norm": 1.5148720198103927, "learning_rate": 8.467467179191493e-06, "loss": 0.5258, "step": 2861 }, { "epoch": 0.27854014598540144, "grad_norm": 1.573048120862393, "learning_rate": 8.466331445472308e-06, "loss": 0.4507, "step": 2862 }, { "epoch": 0.2786374695863747, "grad_norm": 1.3938890789758775, "learning_rate": 8.465195367301639e-06, "loss": 0.3365, "step": 2863 }, { "epoch": 0.27873479318734795, "grad_norm": 1.6895380781567202, "learning_rate": 8.464058944792375e-06, "loss": 0.4132, "step": 2864 }, { "epoch": 0.27883211678832115, "grad_norm": 1.6880546647255488, "learning_rate": 8.462922178057444e-06, "loss": 0.2605, "step": 2865 }, { "epoch": 0.2789294403892944, "grad_norm": 1.491755717654464, "learning_rate": 8.46178506720981e-06, "loss": 0.3983, "step": 2866 }, { "epoch": 0.27902676399026766, "grad_norm": 1.5848666178901887, "learning_rate": 8.460647612362464e-06, "loss": 0.5101, "step": 2867 }, { "epoch": 0.27912408759124085, "grad_norm": 1.3442317187907376, "learning_rate": 8.459509813628437e-06, "loss": 0.458, "step": 2868 }, { "epoch": 0.2792214111922141, "grad_norm": 1.8095809186860319, "learning_rate": 8.458371671120795e-06, "loss": 0.382, "step": 2869 }, { "epoch": 0.27931873479318736, "grad_norm": 0.9909926300929587, "learning_rate": 8.457233184952635e-06, "loss": 0.2292, "step": 2870 }, { "epoch": 0.27941605839416056, "grad_norm": 1.7013118787018624, "learning_rate": 8.456094355237086e-06, "loss": 0.6861, "step": 2871 }, { "epoch": 0.2795133819951338, "grad_norm": 3.4293212695090025, "learning_rate": 8.45495518208732e-06, "loss": 0.3233, "step": 2872 }, { "epoch": 0.27961070559610707, "grad_norm": 1.4903797163776311, "learning_rate": 8.45381566561653e-06, "loss": 0.3231, "step": 2873 }, { "epoch": 0.27970802919708027, "grad_norm": 1.5615177882070261, "learning_rate": 8.452675805937956e-06, "loss": 0.4125, "step": 2874 }, { "epoch": 0.2798053527980535, "grad_norm": 1.4099046900170047, "learning_rate": 8.451535603164865e-06, "loss": 0.4967, "step": 2875 }, { "epoch": 0.2799026763990268, "grad_norm": 1.383217014263479, "learning_rate": 8.450395057410561e-06, "loss": 0.3411, "step": 2876 }, { "epoch": 0.28, "grad_norm": 1.2661588037606646, "learning_rate": 8.449254168788377e-06, "loss": 0.3734, "step": 2877 }, { "epoch": 0.2800973236009732, "grad_norm": 1.4107359648240771, "learning_rate": 8.448112937411689e-06, "loss": 0.4765, "step": 2878 }, { "epoch": 0.2801946472019465, "grad_norm": 1.567373989947911, "learning_rate": 8.446971363393897e-06, "loss": 0.5806, "step": 2879 }, { "epoch": 0.28029197080291973, "grad_norm": 1.5980994022663064, "learning_rate": 8.445829446848442e-06, "loss": 0.3765, "step": 2880 }, { "epoch": 0.28038929440389293, "grad_norm": 1.5582627635759285, "learning_rate": 8.444687187888798e-06, "loss": 0.3838, "step": 2881 }, { "epoch": 0.2804866180048662, "grad_norm": 2.097365147798996, "learning_rate": 8.44354458662847e-06, "loss": 0.6467, "step": 2882 }, { "epoch": 0.28058394160583944, "grad_norm": 1.5302257615618868, "learning_rate": 8.442401643181e-06, "loss": 0.4415, "step": 2883 }, { "epoch": 0.28068126520681264, "grad_norm": 1.1646338986978766, "learning_rate": 8.441258357659962e-06, "loss": 0.3176, "step": 2884 }, { "epoch": 0.2807785888077859, "grad_norm": 1.2287928718701633, "learning_rate": 8.440114730178968e-06, "loss": 0.4175, "step": 2885 }, { "epoch": 0.28087591240875914, "grad_norm": 1.4416072881006319, "learning_rate": 8.438970760851658e-06, "loss": 0.4838, "step": 2886 }, { "epoch": 0.28097323600973234, "grad_norm": 1.319870372533973, "learning_rate": 8.437826449791709e-06, "loss": 0.3421, "step": 2887 }, { "epoch": 0.2810705596107056, "grad_norm": 1.6261475252650914, "learning_rate": 8.436681797112833e-06, "loss": 0.5019, "step": 2888 }, { "epoch": 0.28116788321167885, "grad_norm": 1.6203143716652342, "learning_rate": 8.435536802928774e-06, "loss": 0.4282, "step": 2889 }, { "epoch": 0.28126520681265205, "grad_norm": 1.4127079920263665, "learning_rate": 8.434391467353312e-06, "loss": 0.4542, "step": 2890 }, { "epoch": 0.2813625304136253, "grad_norm": 1.1756885783532405, "learning_rate": 8.433245790500258e-06, "loss": 0.3563, "step": 2891 }, { "epoch": 0.28145985401459855, "grad_norm": 1.1824997482138238, "learning_rate": 8.43209977248346e-06, "loss": 0.3628, "step": 2892 }, { "epoch": 0.28155717761557175, "grad_norm": 1.4280724079623635, "learning_rate": 8.430953413416798e-06, "loss": 0.446, "step": 2893 }, { "epoch": 0.281654501216545, "grad_norm": 1.0710350994410123, "learning_rate": 8.429806713414188e-06, "loss": 0.2016, "step": 2894 }, { "epoch": 0.28175182481751826, "grad_norm": 1.453985226232095, "learning_rate": 8.428659672589574e-06, "loss": 0.4325, "step": 2895 }, { "epoch": 0.28184914841849146, "grad_norm": 1.3045306996673216, "learning_rate": 8.427512291056943e-06, "loss": 0.3838, "step": 2896 }, { "epoch": 0.2819464720194647, "grad_norm": 1.483337521636422, "learning_rate": 8.426364568930309e-06, "loss": 0.4212, "step": 2897 }, { "epoch": 0.28204379562043796, "grad_norm": 1.0901324802348065, "learning_rate": 8.425216506323721e-06, "loss": 0.2392, "step": 2898 }, { "epoch": 0.2821411192214112, "grad_norm": 1.3761268679827663, "learning_rate": 8.424068103351264e-06, "loss": 0.4459, "step": 2899 }, { "epoch": 0.2822384428223844, "grad_norm": 1.461105500215717, "learning_rate": 8.422919360127053e-06, "loss": 0.5018, "step": 2900 }, { "epoch": 0.28233576642335767, "grad_norm": 1.4314465150478046, "learning_rate": 8.421770276765245e-06, "loss": 0.4474, "step": 2901 }, { "epoch": 0.2824330900243309, "grad_norm": 1.6060806185106393, "learning_rate": 8.420620853380018e-06, "loss": 0.5798, "step": 2902 }, { "epoch": 0.2825304136253041, "grad_norm": 1.4468000025910832, "learning_rate": 8.419471090085596e-06, "loss": 0.5597, "step": 2903 }, { "epoch": 0.2826277372262774, "grad_norm": 5.585104457387235, "learning_rate": 8.41832098699623e-06, "loss": 0.3493, "step": 2904 }, { "epoch": 0.28272506082725063, "grad_norm": 1.3577816273786794, "learning_rate": 8.417170544226205e-06, "loss": 0.3262, "step": 2905 }, { "epoch": 0.2828223844282238, "grad_norm": 1.1546363912171016, "learning_rate": 8.416019761889845e-06, "loss": 0.3691, "step": 2906 }, { "epoch": 0.2829197080291971, "grad_norm": 1.3224407401265832, "learning_rate": 8.4148686401015e-06, "loss": 0.3079, "step": 2907 }, { "epoch": 0.28301703163017033, "grad_norm": 1.5947860641264806, "learning_rate": 8.413717178975558e-06, "loss": 0.277, "step": 2908 }, { "epoch": 0.28311435523114353, "grad_norm": 1.343045870800707, "learning_rate": 8.412565378626442e-06, "loss": 0.3448, "step": 2909 }, { "epoch": 0.2832116788321168, "grad_norm": 1.5567901041780798, "learning_rate": 8.411413239168609e-06, "loss": 0.3954, "step": 2910 }, { "epoch": 0.28330900243309004, "grad_norm": 1.5232536009297208, "learning_rate": 8.410260760716545e-06, "loss": 0.5103, "step": 2911 }, { "epoch": 0.28340632603406324, "grad_norm": 1.2493384040941995, "learning_rate": 8.409107943384773e-06, "loss": 0.3671, "step": 2912 }, { "epoch": 0.2835036496350365, "grad_norm": 1.246217249188392, "learning_rate": 8.407954787287848e-06, "loss": 0.4112, "step": 2913 }, { "epoch": 0.28360097323600975, "grad_norm": 1.2012340002353967, "learning_rate": 8.406801292540364e-06, "loss": 0.3769, "step": 2914 }, { "epoch": 0.28369829683698294, "grad_norm": 1.51749407168492, "learning_rate": 8.405647459256939e-06, "loss": 0.5515, "step": 2915 }, { "epoch": 0.2837956204379562, "grad_norm": 1.1589770762667257, "learning_rate": 8.404493287552232e-06, "loss": 0.2577, "step": 2916 }, { "epoch": 0.28389294403892945, "grad_norm": 1.5139932402052954, "learning_rate": 8.403338777540936e-06, "loss": 0.4796, "step": 2917 }, { "epoch": 0.28399026763990265, "grad_norm": 1.5544290759133006, "learning_rate": 8.402183929337774e-06, "loss": 0.4594, "step": 2918 }, { "epoch": 0.2840875912408759, "grad_norm": 1.3525572627526583, "learning_rate": 8.401028743057503e-06, "loss": 0.3978, "step": 2919 }, { "epoch": 0.28418491484184916, "grad_norm": 1.3610916698563846, "learning_rate": 8.399873218814916e-06, "loss": 0.4308, "step": 2920 }, { "epoch": 0.2842822384428224, "grad_norm": 1.2060322500759533, "learning_rate": 8.398717356724837e-06, "loss": 0.482, "step": 2921 }, { "epoch": 0.2843795620437956, "grad_norm": 1.152727586861314, "learning_rate": 8.397561156902126e-06, "loss": 0.3862, "step": 2922 }, { "epoch": 0.28447688564476886, "grad_norm": 1.6371195081735355, "learning_rate": 8.396404619461673e-06, "loss": 0.684, "step": 2923 }, { "epoch": 0.2845742092457421, "grad_norm": 1.4756480619833048, "learning_rate": 8.395247744518407e-06, "loss": 0.4432, "step": 2924 }, { "epoch": 0.2846715328467153, "grad_norm": 1.3495353534897125, "learning_rate": 8.394090532187286e-06, "loss": 0.4574, "step": 2925 }, { "epoch": 0.28476885644768857, "grad_norm": 1.361248347874279, "learning_rate": 8.392932982583301e-06, "loss": 0.3117, "step": 2926 }, { "epoch": 0.2848661800486618, "grad_norm": 1.5493409509214389, "learning_rate": 8.391775095821481e-06, "loss": 0.5949, "step": 2927 }, { "epoch": 0.284963503649635, "grad_norm": 1.5159424124979992, "learning_rate": 8.390616872016886e-06, "loss": 0.612, "step": 2928 }, { "epoch": 0.2850608272506083, "grad_norm": 0.9819694068633834, "learning_rate": 8.389458311284606e-06, "loss": 0.2407, "step": 2929 }, { "epoch": 0.2851581508515815, "grad_norm": 1.4338313849048412, "learning_rate": 8.388299413739772e-06, "loss": 0.504, "step": 2930 }, { "epoch": 0.2852554744525547, "grad_norm": 1.6033282710660985, "learning_rate": 8.387140179497541e-06, "loss": 0.4686, "step": 2931 }, { "epoch": 0.285352798053528, "grad_norm": 1.4339139409278308, "learning_rate": 8.38598060867311e-06, "loss": 0.5885, "step": 2932 }, { "epoch": 0.28545012165450123, "grad_norm": 1.6962944035069916, "learning_rate": 8.384820701381705e-06, "loss": 0.6325, "step": 2933 }, { "epoch": 0.28554744525547443, "grad_norm": 1.2380931242026982, "learning_rate": 8.383660457738585e-06, "loss": 0.3528, "step": 2934 }, { "epoch": 0.2856447688564477, "grad_norm": 1.4958548492045998, "learning_rate": 8.382499877859046e-06, "loss": 0.5261, "step": 2935 }, { "epoch": 0.28574209245742094, "grad_norm": 1.2493863506860636, "learning_rate": 8.381338961858417e-06, "loss": 0.295, "step": 2936 }, { "epoch": 0.28583941605839414, "grad_norm": 1.0264542939220365, "learning_rate": 8.380177709852055e-06, "loss": 0.2736, "step": 2937 }, { "epoch": 0.2859367396593674, "grad_norm": 1.3694559515073481, "learning_rate": 8.379016121955358e-06, "loss": 0.2437, "step": 2938 }, { "epoch": 0.28603406326034064, "grad_norm": 1.3958652644514353, "learning_rate": 8.377854198283751e-06, "loss": 0.5162, "step": 2939 }, { "epoch": 0.28613138686131384, "grad_norm": 1.3188642877167738, "learning_rate": 8.376691938952694e-06, "loss": 0.4403, "step": 2940 }, { "epoch": 0.2862287104622871, "grad_norm": 1.5563883463328907, "learning_rate": 8.375529344077686e-06, "loss": 0.3871, "step": 2941 }, { "epoch": 0.28632603406326035, "grad_norm": 1.7106139691477682, "learning_rate": 8.37436641377425e-06, "loss": 0.5998, "step": 2942 }, { "epoch": 0.2864233576642336, "grad_norm": 1.8227768617334648, "learning_rate": 8.373203148157953e-06, "loss": 0.4192, "step": 2943 }, { "epoch": 0.2865206812652068, "grad_norm": 1.3645142496496503, "learning_rate": 8.372039547344383e-06, "loss": 0.4301, "step": 2944 }, { "epoch": 0.28661800486618005, "grad_norm": 1.4644520960794265, "learning_rate": 8.370875611449173e-06, "loss": 0.4333, "step": 2945 }, { "epoch": 0.2867153284671533, "grad_norm": 1.3686778637415178, "learning_rate": 8.369711340587981e-06, "loss": 0.4735, "step": 2946 }, { "epoch": 0.2868126520681265, "grad_norm": 1.7752150982830557, "learning_rate": 8.368546734876499e-06, "loss": 0.605, "step": 2947 }, { "epoch": 0.28690997566909976, "grad_norm": 1.6349896239905135, "learning_rate": 8.36738179443046e-06, "loss": 0.4521, "step": 2948 }, { "epoch": 0.287007299270073, "grad_norm": 1.7001103309282906, "learning_rate": 8.366216519365623e-06, "loss": 0.5243, "step": 2949 }, { "epoch": 0.2871046228710462, "grad_norm": 1.3288526449094853, "learning_rate": 8.365050909797779e-06, "loss": 0.4226, "step": 2950 }, { "epoch": 0.28720194647201946, "grad_norm": 1.0609308885865543, "learning_rate": 8.36388496584276e-06, "loss": 0.2761, "step": 2951 }, { "epoch": 0.2872992700729927, "grad_norm": 1.3048762567541314, "learning_rate": 8.362718687616422e-06, "loss": 0.3166, "step": 2952 }, { "epoch": 0.2873965936739659, "grad_norm": 1.5602591658770568, "learning_rate": 8.361552075234664e-06, "loss": 0.1814, "step": 2953 }, { "epoch": 0.28749391727493917, "grad_norm": 1.261612878851385, "learning_rate": 8.360385128813409e-06, "loss": 0.3431, "step": 2954 }, { "epoch": 0.2875912408759124, "grad_norm": 1.6502840086679433, "learning_rate": 8.359217848468617e-06, "loss": 0.5688, "step": 2955 }, { "epoch": 0.2876885644768856, "grad_norm": 1.1758618501430975, "learning_rate": 8.358050234316283e-06, "loss": 0.376, "step": 2956 }, { "epoch": 0.2877858880778589, "grad_norm": 1.3748216513361973, "learning_rate": 8.356882286472433e-06, "loss": 0.4893, "step": 2957 }, { "epoch": 0.28788321167883213, "grad_norm": 1.490557754247365, "learning_rate": 8.35571400505313e-06, "loss": 0.4322, "step": 2958 }, { "epoch": 0.2879805352798053, "grad_norm": 1.2474734521766377, "learning_rate": 8.35454539017446e-06, "loss": 0.249, "step": 2959 }, { "epoch": 0.2880778588807786, "grad_norm": 1.3041956082790018, "learning_rate": 8.353376441952554e-06, "loss": 0.3629, "step": 2960 }, { "epoch": 0.28817518248175183, "grad_norm": 1.1813542799359134, "learning_rate": 8.352207160503572e-06, "loss": 0.2541, "step": 2961 }, { "epoch": 0.2882725060827251, "grad_norm": 1.6196703441196314, "learning_rate": 8.351037545943702e-06, "loss": 0.5863, "step": 2962 }, { "epoch": 0.2883698296836983, "grad_norm": 1.6020435634219072, "learning_rate": 8.34986759838917e-06, "loss": 0.5539, "step": 2963 }, { "epoch": 0.28846715328467154, "grad_norm": 1.6170521555116952, "learning_rate": 8.348697317956238e-06, "loss": 0.4234, "step": 2964 }, { "epoch": 0.2885644768856448, "grad_norm": 1.2300623631368495, "learning_rate": 8.347526704761193e-06, "loss": 0.2784, "step": 2965 }, { "epoch": 0.288661800486618, "grad_norm": 2.179168092375873, "learning_rate": 8.346355758920364e-06, "loss": 0.4561, "step": 2966 }, { "epoch": 0.28875912408759125, "grad_norm": 1.5135423174141494, "learning_rate": 8.345184480550104e-06, "loss": 0.3807, "step": 2967 }, { "epoch": 0.2888564476885645, "grad_norm": 1.7005351963186346, "learning_rate": 8.344012869766808e-06, "loss": 0.538, "step": 2968 }, { "epoch": 0.2889537712895377, "grad_norm": 1.2789157911351394, "learning_rate": 8.342840926686898e-06, "loss": 0.2623, "step": 2969 }, { "epoch": 0.28905109489051095, "grad_norm": 1.304761873055631, "learning_rate": 8.34166865142683e-06, "loss": 0.4219, "step": 2970 }, { "epoch": 0.2891484184914842, "grad_norm": 1.6192760894025877, "learning_rate": 8.340496044103095e-06, "loss": 0.4378, "step": 2971 }, { "epoch": 0.2892457420924574, "grad_norm": 1.4363442626245757, "learning_rate": 8.339323104832214e-06, "loss": 0.3819, "step": 2972 }, { "epoch": 0.28934306569343066, "grad_norm": 1.5094300127764981, "learning_rate": 8.338149833730742e-06, "loss": 0.2769, "step": 2973 }, { "epoch": 0.2894403892944039, "grad_norm": 1.6047897202306092, "learning_rate": 8.33697623091527e-06, "loss": 0.424, "step": 2974 }, { "epoch": 0.2895377128953771, "grad_norm": 1.3129110600868221, "learning_rate": 8.33580229650242e-06, "loss": 0.5053, "step": 2975 }, { "epoch": 0.28963503649635036, "grad_norm": 1.1812562932245452, "learning_rate": 8.334628030608845e-06, "loss": 0.3835, "step": 2976 }, { "epoch": 0.2897323600973236, "grad_norm": 1.2211203388582414, "learning_rate": 8.333453433351233e-06, "loss": 0.3531, "step": 2977 }, { "epoch": 0.2898296836982968, "grad_norm": 1.4620903484748373, "learning_rate": 8.332278504846303e-06, "loss": 0.4771, "step": 2978 }, { "epoch": 0.28992700729927007, "grad_norm": 0.9704255718501243, "learning_rate": 8.331103245210812e-06, "loss": 0.2618, "step": 2979 }, { "epoch": 0.2900243309002433, "grad_norm": 1.2827724622455963, "learning_rate": 8.329927654561544e-06, "loss": 0.3052, "step": 2980 }, { "epoch": 0.2901216545012165, "grad_norm": 1.378581411338256, "learning_rate": 8.328751733015316e-06, "loss": 0.3568, "step": 2981 }, { "epoch": 0.2902189781021898, "grad_norm": 1.769807570821765, "learning_rate": 8.327575480688985e-06, "loss": 0.3102, "step": 2982 }, { "epoch": 0.290316301703163, "grad_norm": 1.4326301683333176, "learning_rate": 8.32639889769943e-06, "loss": 0.3218, "step": 2983 }, { "epoch": 0.2904136253041363, "grad_norm": 1.5418816322088151, "learning_rate": 8.325221984163575e-06, "loss": 0.3257, "step": 2984 }, { "epoch": 0.2905109489051095, "grad_norm": 1.573484642436306, "learning_rate": 8.324044740198366e-06, "loss": 0.5401, "step": 2985 }, { "epoch": 0.29060827250608273, "grad_norm": 1.2270555416429247, "learning_rate": 8.322867165920789e-06, "loss": 0.3914, "step": 2986 }, { "epoch": 0.290705596107056, "grad_norm": 1.1838846887742434, "learning_rate": 8.321689261447858e-06, "loss": 0.3282, "step": 2987 }, { "epoch": 0.2908029197080292, "grad_norm": 1.5077214188811954, "learning_rate": 8.320511026896624e-06, "loss": 0.5279, "step": 2988 }, { "epoch": 0.29090024330900244, "grad_norm": 1.1784061774291985, "learning_rate": 8.31933246238417e-06, "loss": 0.403, "step": 2989 }, { "epoch": 0.2909975669099757, "grad_norm": 1.2176703537151474, "learning_rate": 8.318153568027607e-06, "loss": 0.4213, "step": 2990 }, { "epoch": 0.2910948905109489, "grad_norm": 1.3475262123063816, "learning_rate": 8.316974343944085e-06, "loss": 0.4059, "step": 2991 }, { "epoch": 0.29119221411192214, "grad_norm": 1.2398233047847593, "learning_rate": 8.315794790250784e-06, "loss": 0.2626, "step": 2992 }, { "epoch": 0.2912895377128954, "grad_norm": 1.3862498175549538, "learning_rate": 8.314614907064915e-06, "loss": 0.4535, "step": 2993 }, { "epoch": 0.2913868613138686, "grad_norm": 1.455622096437578, "learning_rate": 8.313434694503727e-06, "loss": 0.4067, "step": 2994 }, { "epoch": 0.29148418491484185, "grad_norm": 1.4755183973829757, "learning_rate": 8.312254152684496e-06, "loss": 0.6493, "step": 2995 }, { "epoch": 0.2915815085158151, "grad_norm": 1.0399713771806027, "learning_rate": 8.311073281724536e-06, "loss": 0.3051, "step": 2996 }, { "epoch": 0.2916788321167883, "grad_norm": 1.3151300509583979, "learning_rate": 8.309892081741186e-06, "loss": 0.3982, "step": 2997 }, { "epoch": 0.29177615571776155, "grad_norm": 1.376541833798208, "learning_rate": 8.308710552851826e-06, "loss": 0.4749, "step": 2998 }, { "epoch": 0.2918734793187348, "grad_norm": 1.2551786912554768, "learning_rate": 8.307528695173865e-06, "loss": 0.3118, "step": 2999 }, { "epoch": 0.291970802919708, "grad_norm": 2.1707038191553463, "learning_rate": 8.306346508824746e-06, "loss": 0.3438, "step": 3000 }, { "epoch": 0.29206812652068126, "grad_norm": 1.4299459588569998, "learning_rate": 8.30516399392194e-06, "loss": 0.4838, "step": 3001 }, { "epoch": 0.2921654501216545, "grad_norm": 1.378341342959643, "learning_rate": 8.303981150582958e-06, "loss": 0.5055, "step": 3002 }, { "epoch": 0.2922627737226277, "grad_norm": 1.4826508798742193, "learning_rate": 8.302797978925338e-06, "loss": 0.3737, "step": 3003 }, { "epoch": 0.29236009732360096, "grad_norm": 1.222513403789782, "learning_rate": 8.301614479066653e-06, "loss": 0.4587, "step": 3004 }, { "epoch": 0.2924574209245742, "grad_norm": 1.3819233250029228, "learning_rate": 8.300430651124508e-06, "loss": 0.4021, "step": 3005 }, { "epoch": 0.29255474452554747, "grad_norm": 1.2846536784172882, "learning_rate": 8.29924649521654e-06, "loss": 0.3609, "step": 3006 }, { "epoch": 0.29265206812652067, "grad_norm": 1.4274226525457885, "learning_rate": 8.298062011460419e-06, "loss": 0.5267, "step": 3007 }, { "epoch": 0.2927493917274939, "grad_norm": 1.4642655922839627, "learning_rate": 8.296877199973849e-06, "loss": 0.3499, "step": 3008 }, { "epoch": 0.2928467153284672, "grad_norm": 1.4317302181421974, "learning_rate": 8.295692060874568e-06, "loss": 0.4979, "step": 3009 }, { "epoch": 0.2929440389294404, "grad_norm": 1.3191877461185262, "learning_rate": 8.294506594280338e-06, "loss": 0.2835, "step": 3010 }, { "epoch": 0.29304136253041363, "grad_norm": 1.0943861065294986, "learning_rate": 8.293320800308964e-06, "loss": 0.2138, "step": 3011 }, { "epoch": 0.2931386861313869, "grad_norm": 1.2621219805575281, "learning_rate": 8.292134679078277e-06, "loss": 0.3027, "step": 3012 }, { "epoch": 0.2932360097323601, "grad_norm": 1.556172337566686, "learning_rate": 8.290948230706145e-06, "loss": 0.4462, "step": 3013 }, { "epoch": 0.29333333333333333, "grad_norm": 1.3363658374504028, "learning_rate": 8.289761455310463e-06, "loss": 0.373, "step": 3014 }, { "epoch": 0.2934306569343066, "grad_norm": 1.4458593210455408, "learning_rate": 8.288574353009164e-06, "loss": 0.5566, "step": 3015 }, { "epoch": 0.2935279805352798, "grad_norm": 1.5034274044899172, "learning_rate": 8.287386923920211e-06, "loss": 0.3837, "step": 3016 }, { "epoch": 0.29362530413625304, "grad_norm": 1.484769748600726, "learning_rate": 8.286199168161598e-06, "loss": 0.3173, "step": 3017 }, { "epoch": 0.2937226277372263, "grad_norm": 1.4336064725306121, "learning_rate": 8.285011085851353e-06, "loss": 0.4005, "step": 3018 }, { "epoch": 0.2938199513381995, "grad_norm": 1.3857231757141482, "learning_rate": 8.283822677107539e-06, "loss": 0.481, "step": 3019 }, { "epoch": 0.29391727493917275, "grad_norm": 1.4086307294395457, "learning_rate": 8.282633942048244e-06, "loss": 0.4181, "step": 3020 }, { "epoch": 0.294014598540146, "grad_norm": 1.4701075671537391, "learning_rate": 8.2814448807916e-06, "loss": 0.4041, "step": 3021 }, { "epoch": 0.2941119221411192, "grad_norm": 1.5925621393078395, "learning_rate": 8.28025549345576e-06, "loss": 0.3062, "step": 3022 }, { "epoch": 0.29420924574209245, "grad_norm": 1.6058911141553376, "learning_rate": 8.279065780158914e-06, "loss": 0.5534, "step": 3023 }, { "epoch": 0.2943065693430657, "grad_norm": 1.4134575830281486, "learning_rate": 8.277875741019289e-06, "loss": 0.5017, "step": 3024 }, { "epoch": 0.2944038929440389, "grad_norm": 1.6163740830610969, "learning_rate": 8.276685376155133e-06, "loss": 0.5513, "step": 3025 }, { "epoch": 0.29450121654501216, "grad_norm": 1.3415920762045879, "learning_rate": 8.275494685684739e-06, "loss": 0.4209, "step": 3026 }, { "epoch": 0.2945985401459854, "grad_norm": 1.699522776097275, "learning_rate": 8.274303669726427e-06, "loss": 0.2444, "step": 3027 }, { "epoch": 0.29469586374695866, "grad_norm": 1.3118143561432465, "learning_rate": 8.273112328398545e-06, "loss": 0.3282, "step": 3028 }, { "epoch": 0.29479318734793186, "grad_norm": 1.3608335365502384, "learning_rate": 8.271920661819479e-06, "loss": 0.4625, "step": 3029 }, { "epoch": 0.2948905109489051, "grad_norm": 1.320965035708582, "learning_rate": 8.270728670107645e-06, "loss": 0.4161, "step": 3030 }, { "epoch": 0.29498783454987837, "grad_norm": 1.2315684415049128, "learning_rate": 8.269536353381493e-06, "loss": 0.3264, "step": 3031 }, { "epoch": 0.29508515815085157, "grad_norm": 1.2397754210481065, "learning_rate": 8.268343711759505e-06, "loss": 0.3184, "step": 3032 }, { "epoch": 0.2951824817518248, "grad_norm": 1.4717261820272485, "learning_rate": 8.267150745360194e-06, "loss": 0.381, "step": 3033 }, { "epoch": 0.2952798053527981, "grad_norm": 1.7364842416546407, "learning_rate": 8.265957454302102e-06, "loss": 0.3639, "step": 3034 }, { "epoch": 0.2953771289537713, "grad_norm": 1.6249980192905973, "learning_rate": 8.264763838703813e-06, "loss": 0.5112, "step": 3035 }, { "epoch": 0.2954744525547445, "grad_norm": 1.682249094263979, "learning_rate": 8.263569898683934e-06, "loss": 0.4894, "step": 3036 }, { "epoch": 0.2955717761557178, "grad_norm": 1.9200248186176307, "learning_rate": 8.262375634361108e-06, "loss": 0.529, "step": 3037 }, { "epoch": 0.295669099756691, "grad_norm": 1.4426650259998133, "learning_rate": 8.261181045854011e-06, "loss": 0.5037, "step": 3038 }, { "epoch": 0.29576642335766423, "grad_norm": 1.6904227765149746, "learning_rate": 8.259986133281348e-06, "loss": 0.3632, "step": 3039 }, { "epoch": 0.2958637469586375, "grad_norm": 1.3863799205056755, "learning_rate": 8.25879089676186e-06, "loss": 0.4148, "step": 3040 }, { "epoch": 0.2959610705596107, "grad_norm": 1.627436205526306, "learning_rate": 8.257595336414317e-06, "loss": 0.4558, "step": 3041 }, { "epoch": 0.29605839416058394, "grad_norm": 1.3163567598814478, "learning_rate": 8.256399452357524e-06, "loss": 0.2713, "step": 3042 }, { "epoch": 0.2961557177615572, "grad_norm": 1.6072179171276018, "learning_rate": 8.255203244710316e-06, "loss": 0.353, "step": 3043 }, { "epoch": 0.2962530413625304, "grad_norm": 1.4217719575203627, "learning_rate": 8.254006713591559e-06, "loss": 0.3744, "step": 3044 }, { "epoch": 0.29635036496350364, "grad_norm": 1.9013012922141048, "learning_rate": 8.252809859120154e-06, "loss": 0.209, "step": 3045 }, { "epoch": 0.2964476885644769, "grad_norm": 1.390657831725977, "learning_rate": 8.251612681415035e-06, "loss": 0.3722, "step": 3046 }, { "epoch": 0.2965450121654501, "grad_norm": 1.4478686848472833, "learning_rate": 8.250415180595167e-06, "loss": 0.3869, "step": 3047 }, { "epoch": 0.29664233576642335, "grad_norm": 1.1443911522017596, "learning_rate": 8.249217356779544e-06, "loss": 0.3385, "step": 3048 }, { "epoch": 0.2967396593673966, "grad_norm": 1.7245119786652503, "learning_rate": 8.248019210087195e-06, "loss": 0.3023, "step": 3049 }, { "epoch": 0.29683698296836986, "grad_norm": 1.8030337728763741, "learning_rate": 8.24682074063718e-06, "loss": 0.3784, "step": 3050 }, { "epoch": 0.29693430656934305, "grad_norm": 1.299417141317702, "learning_rate": 8.245621948548593e-06, "loss": 0.2963, "step": 3051 }, { "epoch": 0.2970316301703163, "grad_norm": 1.3334468356141627, "learning_rate": 8.244422833940558e-06, "loss": 0.3671, "step": 3052 }, { "epoch": 0.29712895377128956, "grad_norm": 1.6168488226188178, "learning_rate": 8.24322339693223e-06, "loss": 0.5497, "step": 3053 }, { "epoch": 0.29722627737226276, "grad_norm": 1.49700230831562, "learning_rate": 8.242023637642802e-06, "loss": 0.4567, "step": 3054 }, { "epoch": 0.297323600973236, "grad_norm": 1.0494586888942983, "learning_rate": 8.24082355619149e-06, "loss": 0.2186, "step": 3055 }, { "epoch": 0.29742092457420927, "grad_norm": 1.372792205417397, "learning_rate": 8.239623152697553e-06, "loss": 0.5083, "step": 3056 }, { "epoch": 0.29751824817518246, "grad_norm": 1.266230497219453, "learning_rate": 8.238422427280269e-06, "loss": 0.461, "step": 3057 }, { "epoch": 0.2976155717761557, "grad_norm": 1.5041389582539588, "learning_rate": 8.237221380058959e-06, "loss": 0.3813, "step": 3058 }, { "epoch": 0.29771289537712897, "grad_norm": 1.4593593621079823, "learning_rate": 8.23602001115297e-06, "loss": 0.473, "step": 3059 }, { "epoch": 0.29781021897810217, "grad_norm": 1.3666083716931219, "learning_rate": 8.234818320681685e-06, "loss": 0.4822, "step": 3060 }, { "epoch": 0.2979075425790754, "grad_norm": 1.407870228183954, "learning_rate": 8.233616308764513e-06, "loss": 0.4012, "step": 3061 }, { "epoch": 0.2980048661800487, "grad_norm": 1.4404350668596586, "learning_rate": 8.232413975520903e-06, "loss": 0.5057, "step": 3062 }, { "epoch": 0.2981021897810219, "grad_norm": 1.3912456713229528, "learning_rate": 8.231211321070329e-06, "loss": 0.4578, "step": 3063 }, { "epoch": 0.29819951338199513, "grad_norm": 1.3191795228165797, "learning_rate": 8.2300083455323e-06, "loss": 0.3888, "step": 3064 }, { "epoch": 0.2982968369829684, "grad_norm": 1.4258248936492355, "learning_rate": 8.228805049026355e-06, "loss": 0.5108, "step": 3065 }, { "epoch": 0.2983941605839416, "grad_norm": 1.4850835614825084, "learning_rate": 8.22760143167207e-06, "loss": 0.5968, "step": 3066 }, { "epoch": 0.29849148418491483, "grad_norm": 1.2696050534436827, "learning_rate": 8.226397493589044e-06, "loss": 0.3328, "step": 3067 }, { "epoch": 0.2985888077858881, "grad_norm": 1.1993181516723008, "learning_rate": 8.225193234896918e-06, "loss": 0.2682, "step": 3068 }, { "epoch": 0.2986861313868613, "grad_norm": 1.3420953543565923, "learning_rate": 8.223988655715355e-06, "loss": 0.3865, "step": 3069 }, { "epoch": 0.29878345498783454, "grad_norm": 1.305913976862295, "learning_rate": 8.222783756164061e-06, "loss": 0.3551, "step": 3070 }, { "epoch": 0.2988807785888078, "grad_norm": 1.3385899852932626, "learning_rate": 8.221578536362764e-06, "loss": 0.4203, "step": 3071 }, { "epoch": 0.29897810218978105, "grad_norm": 1.189534251886867, "learning_rate": 8.220372996431228e-06, "loss": 0.2937, "step": 3072 }, { "epoch": 0.29907542579075425, "grad_norm": 1.5982329206910104, "learning_rate": 8.219167136489245e-06, "loss": 0.6064, "step": 3073 }, { "epoch": 0.2991727493917275, "grad_norm": 1.775024980718492, "learning_rate": 8.217960956656648e-06, "loss": 0.5517, "step": 3074 }, { "epoch": 0.29927007299270075, "grad_norm": 1.4818012612095348, "learning_rate": 8.216754457053291e-06, "loss": 0.3574, "step": 3075 }, { "epoch": 0.29936739659367395, "grad_norm": 1.5621403089409462, "learning_rate": 8.215547637799068e-06, "loss": 0.4108, "step": 3076 }, { "epoch": 0.2994647201946472, "grad_norm": 1.4983847186167278, "learning_rate": 8.214340499013899e-06, "loss": 0.4644, "step": 3077 }, { "epoch": 0.29956204379562046, "grad_norm": 1.5897848132407382, "learning_rate": 8.213133040817738e-06, "loss": 0.4894, "step": 3078 }, { "epoch": 0.29965936739659366, "grad_norm": 1.6354640621760643, "learning_rate": 8.211925263330573e-06, "loss": 0.4583, "step": 3079 }, { "epoch": 0.2997566909975669, "grad_norm": 1.4952024987397354, "learning_rate": 8.21071716667242e-06, "loss": 0.5976, "step": 3080 }, { "epoch": 0.29985401459854016, "grad_norm": 1.0095340308225043, "learning_rate": 8.20950875096333e-06, "loss": 0.2524, "step": 3081 }, { "epoch": 0.29995133819951336, "grad_norm": 1.4197678935056404, "learning_rate": 8.208300016323381e-06, "loss": 0.5514, "step": 3082 }, { "epoch": 0.3000486618004866, "grad_norm": 1.249287306745543, "learning_rate": 8.207090962872688e-06, "loss": 0.2683, "step": 3083 }, { "epoch": 0.30014598540145987, "grad_norm": 1.2420194980085992, "learning_rate": 8.205881590731394e-06, "loss": 0.3941, "step": 3084 }, { "epoch": 0.30024330900243307, "grad_norm": 1.0228818593574307, "learning_rate": 8.204671900019676e-06, "loss": 0.2158, "step": 3085 }, { "epoch": 0.3003406326034063, "grad_norm": 1.4988207950368069, "learning_rate": 8.203461890857743e-06, "loss": 0.4833, "step": 3086 }, { "epoch": 0.3004379562043796, "grad_norm": 1.3402746636459373, "learning_rate": 8.20225156336583e-06, "loss": 0.437, "step": 3087 }, { "epoch": 0.3005352798053528, "grad_norm": 1.3071666622302105, "learning_rate": 8.201040917664214e-06, "loss": 0.3667, "step": 3088 }, { "epoch": 0.300632603406326, "grad_norm": 2.001934665501785, "learning_rate": 8.199829953873192e-06, "loss": 0.346, "step": 3089 }, { "epoch": 0.3007299270072993, "grad_norm": 1.50451394225963, "learning_rate": 8.198618672113104e-06, "loss": 0.4897, "step": 3090 }, { "epoch": 0.3008272506082725, "grad_norm": 1.5127622960173581, "learning_rate": 8.197407072504309e-06, "loss": 0.4301, "step": 3091 }, { "epoch": 0.30092457420924573, "grad_norm": 1.409495275402236, "learning_rate": 8.196195155167211e-06, "loss": 0.4954, "step": 3092 }, { "epoch": 0.301021897810219, "grad_norm": 1.3458224438835962, "learning_rate": 8.194982920222233e-06, "loss": 0.5023, "step": 3093 }, { "epoch": 0.30111922141119224, "grad_norm": 1.484836707336815, "learning_rate": 8.19377036778984e-06, "loss": 0.4471, "step": 3094 }, { "epoch": 0.30121654501216544, "grad_norm": 1.4314600061658445, "learning_rate": 8.192557497990522e-06, "loss": 0.4519, "step": 3095 }, { "epoch": 0.3013138686131387, "grad_norm": 1.228152077257465, "learning_rate": 8.191344310944803e-06, "loss": 0.2338, "step": 3096 }, { "epoch": 0.30141119221411194, "grad_norm": 1.4025619039626473, "learning_rate": 8.19013080677324e-06, "loss": 0.3748, "step": 3097 }, { "epoch": 0.30150851581508514, "grad_norm": 1.535338102251852, "learning_rate": 8.188916985596415e-06, "loss": 0.3129, "step": 3098 }, { "epoch": 0.3016058394160584, "grad_norm": 1.7024230210298346, "learning_rate": 8.187702847534952e-06, "loss": 0.5525, "step": 3099 }, { "epoch": 0.30170316301703165, "grad_norm": 1.4950690283515784, "learning_rate": 8.186488392709495e-06, "loss": 0.5258, "step": 3100 }, { "epoch": 0.30180048661800485, "grad_norm": 1.589216178732189, "learning_rate": 8.18527362124073e-06, "loss": 0.5745, "step": 3101 }, { "epoch": 0.3018978102189781, "grad_norm": 1.5942675928105552, "learning_rate": 8.184058533249367e-06, "loss": 0.6344, "step": 3102 }, { "epoch": 0.30199513381995136, "grad_norm": 1.3981131065521017, "learning_rate": 8.18284312885615e-06, "loss": 0.3369, "step": 3103 }, { "epoch": 0.30209245742092455, "grad_norm": 1.6180199585993311, "learning_rate": 8.181627408181854e-06, "loss": 0.4014, "step": 3104 }, { "epoch": 0.3021897810218978, "grad_norm": 1.6338683004824879, "learning_rate": 8.180411371347288e-06, "loss": 0.4983, "step": 3105 }, { "epoch": 0.30228710462287106, "grad_norm": 1.5225224020676915, "learning_rate": 8.17919501847329e-06, "loss": 0.5016, "step": 3106 }, { "epoch": 0.30238442822384426, "grad_norm": 1.23190340238718, "learning_rate": 8.177978349680727e-06, "loss": 0.3644, "step": 3107 }, { "epoch": 0.3024817518248175, "grad_norm": 1.4496645177592962, "learning_rate": 8.176761365090503e-06, "loss": 0.526, "step": 3108 }, { "epoch": 0.30257907542579077, "grad_norm": 1.5209859048615393, "learning_rate": 8.17554406482355e-06, "loss": 0.3034, "step": 3109 }, { "epoch": 0.30267639902676396, "grad_norm": 1.4404359772108442, "learning_rate": 8.17432644900083e-06, "loss": 0.4735, "step": 3110 }, { "epoch": 0.3027737226277372, "grad_norm": 1.2693525922216498, "learning_rate": 8.173108517743343e-06, "loss": 0.4021, "step": 3111 }, { "epoch": 0.30287104622871047, "grad_norm": 1.3995736051817393, "learning_rate": 8.171890271172109e-06, "loss": 0.3084, "step": 3112 }, { "epoch": 0.3029683698296837, "grad_norm": 1.5690384436250255, "learning_rate": 8.17067170940819e-06, "loss": 0.4097, "step": 3113 }, { "epoch": 0.3030656934306569, "grad_norm": 1.270566641334736, "learning_rate": 8.169452832572676e-06, "loss": 0.3813, "step": 3114 }, { "epoch": 0.3031630170316302, "grad_norm": 1.1690990375599999, "learning_rate": 8.168233640786682e-06, "loss": 0.2898, "step": 3115 }, { "epoch": 0.30326034063260343, "grad_norm": 1.5367454476066444, "learning_rate": 8.167014134171367e-06, "loss": 0.4167, "step": 3116 }, { "epoch": 0.30335766423357663, "grad_norm": 1.113322849500334, "learning_rate": 8.165794312847912e-06, "loss": 0.3274, "step": 3117 }, { "epoch": 0.3034549878345499, "grad_norm": 1.4711999953076527, "learning_rate": 8.164574176937527e-06, "loss": 0.368, "step": 3118 }, { "epoch": 0.30355231143552314, "grad_norm": 1.4465621082621003, "learning_rate": 8.163353726561462e-06, "loss": 0.2719, "step": 3119 }, { "epoch": 0.30364963503649633, "grad_norm": 1.5224694722189016, "learning_rate": 8.162132961840994e-06, "loss": 0.3296, "step": 3120 }, { "epoch": 0.3037469586374696, "grad_norm": 1.3713377819635104, "learning_rate": 8.160911882897429e-06, "loss": 0.3064, "step": 3121 }, { "epoch": 0.30384428223844284, "grad_norm": 1.6461819951429466, "learning_rate": 8.159690489852108e-06, "loss": 0.3646, "step": 3122 }, { "epoch": 0.30394160583941604, "grad_norm": 1.4328493269552467, "learning_rate": 8.1584687828264e-06, "loss": 0.4363, "step": 3123 }, { "epoch": 0.3040389294403893, "grad_norm": 1.2811535124384867, "learning_rate": 8.157246761941708e-06, "loss": 0.4582, "step": 3124 }, { "epoch": 0.30413625304136255, "grad_norm": 1.2144846492785035, "learning_rate": 8.156024427319464e-06, "loss": 0.2413, "step": 3125 }, { "epoch": 0.30423357664233575, "grad_norm": 1.4112474441167293, "learning_rate": 8.154801779081135e-06, "loss": 0.4762, "step": 3126 }, { "epoch": 0.304330900243309, "grad_norm": 1.4641495751020401, "learning_rate": 8.153578817348213e-06, "loss": 0.4905, "step": 3127 }, { "epoch": 0.30442822384428225, "grad_norm": 1.7041758523831827, "learning_rate": 8.152355542242226e-06, "loss": 0.5396, "step": 3128 }, { "epoch": 0.30452554744525545, "grad_norm": 1.2349793608481423, "learning_rate": 8.151131953884728e-06, "loss": 0.3847, "step": 3129 }, { "epoch": 0.3046228710462287, "grad_norm": 1.4859954822671841, "learning_rate": 8.149908052397314e-06, "loss": 0.5907, "step": 3130 }, { "epoch": 0.30472019464720196, "grad_norm": 1.2693166698162108, "learning_rate": 8.148683837901599e-06, "loss": 0.2636, "step": 3131 }, { "epoch": 0.30481751824817516, "grad_norm": 1.1084842598244526, "learning_rate": 8.147459310519238e-06, "loss": 0.3103, "step": 3132 }, { "epoch": 0.3049148418491484, "grad_norm": 1.2151450124002459, "learning_rate": 8.146234470371908e-06, "loss": 0.2734, "step": 3133 }, { "epoch": 0.30501216545012166, "grad_norm": 1.3625938628176788, "learning_rate": 8.145009317581328e-06, "loss": 0.3757, "step": 3134 }, { "epoch": 0.3051094890510949, "grad_norm": 1.3367991206467007, "learning_rate": 8.143783852269239e-06, "loss": 0.3469, "step": 3135 }, { "epoch": 0.3052068126520681, "grad_norm": 1.5485997458565015, "learning_rate": 8.142558074557413e-06, "loss": 0.6068, "step": 3136 }, { "epoch": 0.30530413625304137, "grad_norm": 1.4327175362669387, "learning_rate": 8.141331984567661e-06, "loss": 0.4495, "step": 3137 }, { "epoch": 0.3054014598540146, "grad_norm": 1.4648525390361329, "learning_rate": 8.140105582421819e-06, "loss": 0.4855, "step": 3138 }, { "epoch": 0.3054987834549878, "grad_norm": 1.147269868566856, "learning_rate": 8.138878868241755e-06, "loss": 0.3671, "step": 3139 }, { "epoch": 0.3055961070559611, "grad_norm": 1.4274559741070532, "learning_rate": 8.13765184214937e-06, "loss": 0.4246, "step": 3140 }, { "epoch": 0.30569343065693433, "grad_norm": 1.3864373579762495, "learning_rate": 8.13642450426659e-06, "loss": 0.5252, "step": 3141 }, { "epoch": 0.3057907542579075, "grad_norm": 1.3508219371380046, "learning_rate": 8.135196854715382e-06, "loss": 0.4022, "step": 3142 }, { "epoch": 0.3058880778588808, "grad_norm": 1.3974501891292628, "learning_rate": 8.133968893617734e-06, "loss": 0.4903, "step": 3143 }, { "epoch": 0.30598540145985403, "grad_norm": 1.379672607479744, "learning_rate": 8.132740621095672e-06, "loss": 0.4389, "step": 3144 }, { "epoch": 0.30608272506082723, "grad_norm": 1.5338640282858476, "learning_rate": 8.131512037271248e-06, "loss": 0.5719, "step": 3145 }, { "epoch": 0.3061800486618005, "grad_norm": 1.754315640729903, "learning_rate": 8.130283142266549e-06, "loss": 0.4684, "step": 3146 }, { "epoch": 0.30627737226277374, "grad_norm": 1.3260485404955915, "learning_rate": 8.129053936203688e-06, "loss": 0.3967, "step": 3147 }, { "epoch": 0.30637469586374694, "grad_norm": 1.3926987348333701, "learning_rate": 8.127824419204818e-06, "loss": 0.3916, "step": 3148 }, { "epoch": 0.3064720194647202, "grad_norm": 1.4794110467900325, "learning_rate": 8.126594591392108e-06, "loss": 0.4127, "step": 3149 }, { "epoch": 0.30656934306569344, "grad_norm": 1.5256531082933278, "learning_rate": 8.125364452887775e-06, "loss": 0.4219, "step": 3150 }, { "epoch": 0.30666666666666664, "grad_norm": 1.2765687697220431, "learning_rate": 8.124134003814054e-06, "loss": 0.3482, "step": 3151 }, { "epoch": 0.3067639902676399, "grad_norm": 1.345009147300955, "learning_rate": 8.122903244293217e-06, "loss": 0.2419, "step": 3152 }, { "epoch": 0.30686131386861315, "grad_norm": 1.243655794496096, "learning_rate": 8.121672174447566e-06, "loss": 0.3132, "step": 3153 }, { "epoch": 0.30695863746958635, "grad_norm": 1.4526276096090445, "learning_rate": 8.120440794399432e-06, "loss": 0.5369, "step": 3154 }, { "epoch": 0.3070559610705596, "grad_norm": 1.4088986355103132, "learning_rate": 8.119209104271177e-06, "loss": 0.331, "step": 3155 }, { "epoch": 0.30715328467153286, "grad_norm": 1.4915113265208189, "learning_rate": 8.117977104185198e-06, "loss": 0.6195, "step": 3156 }, { "epoch": 0.3072506082725061, "grad_norm": 1.143472956889321, "learning_rate": 8.116744794263916e-06, "loss": 0.2632, "step": 3157 }, { "epoch": 0.3073479318734793, "grad_norm": 1.3575606240914238, "learning_rate": 8.11551217462979e-06, "loss": 0.3927, "step": 3158 }, { "epoch": 0.30744525547445256, "grad_norm": 1.2891794787417357, "learning_rate": 8.114279245405301e-06, "loss": 0.3766, "step": 3159 }, { "epoch": 0.3075425790754258, "grad_norm": 1.1671813045475161, "learning_rate": 8.113046006712973e-06, "loss": 0.3527, "step": 3160 }, { "epoch": 0.307639902676399, "grad_norm": 1.2346428689153102, "learning_rate": 8.111812458675348e-06, "loss": 0.456, "step": 3161 }, { "epoch": 0.30773722627737227, "grad_norm": 1.543619139526521, "learning_rate": 8.110578601415007e-06, "loss": 0.419, "step": 3162 }, { "epoch": 0.3078345498783455, "grad_norm": 1.5361457722305751, "learning_rate": 8.109344435054557e-06, "loss": 0.4477, "step": 3163 }, { "epoch": 0.3079318734793187, "grad_norm": 1.0948111699644958, "learning_rate": 8.108109959716641e-06, "loss": 0.3469, "step": 3164 }, { "epoch": 0.30802919708029197, "grad_norm": 1.4900262169803653, "learning_rate": 8.106875175523928e-06, "loss": 0.5066, "step": 3165 }, { "epoch": 0.3081265206812652, "grad_norm": 1.2143378476964652, "learning_rate": 8.105640082599118e-06, "loss": 0.4016, "step": 3166 }, { "epoch": 0.3082238442822384, "grad_norm": 1.227404068812886, "learning_rate": 8.104404681064943e-06, "loss": 0.3408, "step": 3167 }, { "epoch": 0.3083211678832117, "grad_norm": 1.273486832675327, "learning_rate": 8.10316897104417e-06, "loss": 0.3819, "step": 3168 }, { "epoch": 0.30841849148418493, "grad_norm": 1.390509439874599, "learning_rate": 8.101932952659586e-06, "loss": 0.5108, "step": 3169 }, { "epoch": 0.30851581508515813, "grad_norm": 1.1910701089910116, "learning_rate": 8.100696626034019e-06, "loss": 0.3579, "step": 3170 }, { "epoch": 0.3086131386861314, "grad_norm": 1.6328619260471173, "learning_rate": 8.099459991290324e-06, "loss": 0.666, "step": 3171 }, { "epoch": 0.30871046228710464, "grad_norm": 1.4085180007277236, "learning_rate": 8.09822304855138e-06, "loss": 0.3684, "step": 3172 }, { "epoch": 0.30880778588807783, "grad_norm": 1.5920530522626664, "learning_rate": 8.096985797940111e-06, "loss": 0.4499, "step": 3173 }, { "epoch": 0.3089051094890511, "grad_norm": 1.2895313219583247, "learning_rate": 8.09574823957946e-06, "loss": 0.4939, "step": 3174 }, { "epoch": 0.30900243309002434, "grad_norm": 1.5242111980147517, "learning_rate": 8.094510373592403e-06, "loss": 0.3223, "step": 3175 }, { "epoch": 0.30909975669099754, "grad_norm": 1.3628460645839475, "learning_rate": 8.093272200101946e-06, "loss": 0.507, "step": 3176 }, { "epoch": 0.3091970802919708, "grad_norm": 1.45716247785806, "learning_rate": 8.092033719231134e-06, "loss": 0.2011, "step": 3177 }, { "epoch": 0.30929440389294405, "grad_norm": 1.0824220688323085, "learning_rate": 8.090794931103026e-06, "loss": 0.2127, "step": 3178 }, { "epoch": 0.3093917274939173, "grad_norm": 1.3637285345889945, "learning_rate": 8.089555835840728e-06, "loss": 0.3567, "step": 3179 }, { "epoch": 0.3094890510948905, "grad_norm": 1.352681485055594, "learning_rate": 8.088316433567369e-06, "loss": 0.4403, "step": 3180 }, { "epoch": 0.30958637469586375, "grad_norm": 1.5330943463849844, "learning_rate": 8.087076724406106e-06, "loss": 0.3379, "step": 3181 }, { "epoch": 0.309683698296837, "grad_norm": 1.4102464472701088, "learning_rate": 8.08583670848013e-06, "loss": 0.5173, "step": 3182 }, { "epoch": 0.3097810218978102, "grad_norm": 1.3268465957799758, "learning_rate": 8.084596385912666e-06, "loss": 0.2684, "step": 3183 }, { "epoch": 0.30987834549878346, "grad_norm": 1.0612856713580272, "learning_rate": 8.083355756826962e-06, "loss": 0.2057, "step": 3184 }, { "epoch": 0.3099756690997567, "grad_norm": 1.2705392445129486, "learning_rate": 8.082114821346302e-06, "loss": 0.4234, "step": 3185 }, { "epoch": 0.3100729927007299, "grad_norm": 1.502946661443704, "learning_rate": 8.080873579593997e-06, "loss": 0.5134, "step": 3186 }, { "epoch": 0.31017031630170316, "grad_norm": 1.4788874331253061, "learning_rate": 8.079632031693392e-06, "loss": 0.6157, "step": 3187 }, { "epoch": 0.3102676399026764, "grad_norm": 1.0642136610663042, "learning_rate": 8.078390177767858e-06, "loss": 0.2667, "step": 3188 }, { "epoch": 0.3103649635036496, "grad_norm": 1.2694637339318475, "learning_rate": 8.0771480179408e-06, "loss": 0.4189, "step": 3189 }, { "epoch": 0.31046228710462287, "grad_norm": 1.40127369638598, "learning_rate": 8.075905552335652e-06, "loss": 0.6007, "step": 3190 }, { "epoch": 0.3105596107055961, "grad_norm": 1.4654952978073685, "learning_rate": 8.07466278107588e-06, "loss": 0.524, "step": 3191 }, { "epoch": 0.3106569343065693, "grad_norm": 1.468706008576981, "learning_rate": 8.073419704284977e-06, "loss": 0.5511, "step": 3192 }, { "epoch": 0.3107542579075426, "grad_norm": 1.3171427220237197, "learning_rate": 8.072176322086468e-06, "loss": 0.4903, "step": 3193 }, { "epoch": 0.31085158150851583, "grad_norm": 1.5864389312753313, "learning_rate": 8.07093263460391e-06, "loss": 0.7036, "step": 3194 }, { "epoch": 0.310948905109489, "grad_norm": 1.3892836370042843, "learning_rate": 8.06968864196089e-06, "loss": 0.4277, "step": 3195 }, { "epoch": 0.3110462287104623, "grad_norm": 1.1676123850477602, "learning_rate": 8.06844434428102e-06, "loss": 0.2693, "step": 3196 }, { "epoch": 0.31114355231143553, "grad_norm": 1.506360397408636, "learning_rate": 8.067199741687951e-06, "loss": 0.4425, "step": 3197 }, { "epoch": 0.31124087591240873, "grad_norm": 1.9902872035508865, "learning_rate": 8.065954834305359e-06, "loss": 0.4464, "step": 3198 }, { "epoch": 0.311338199513382, "grad_norm": 1.3738283887856941, "learning_rate": 8.06470962225695e-06, "loss": 0.2504, "step": 3199 }, { "epoch": 0.31143552311435524, "grad_norm": 1.5173657046475728, "learning_rate": 8.063464105666462e-06, "loss": 0.4145, "step": 3200 }, { "epoch": 0.3115328467153285, "grad_norm": 1.5348915862029098, "learning_rate": 8.062218284657663e-06, "loss": 0.4182, "step": 3201 }, { "epoch": 0.3116301703163017, "grad_norm": 1.6415893626851852, "learning_rate": 8.06097215935435e-06, "loss": 0.5928, "step": 3202 }, { "epoch": 0.31172749391727494, "grad_norm": 1.2746939048596826, "learning_rate": 8.059725729880354e-06, "loss": 0.2945, "step": 3203 }, { "epoch": 0.3118248175182482, "grad_norm": 1.354856369246625, "learning_rate": 8.05847899635953e-06, "loss": 0.445, "step": 3204 }, { "epoch": 0.3119221411192214, "grad_norm": 1.3006695307047205, "learning_rate": 8.057231958915767e-06, "loss": 0.3558, "step": 3205 }, { "epoch": 0.31201946472019465, "grad_norm": 1.294215405183638, "learning_rate": 8.05598461767299e-06, "loss": 0.3764, "step": 3206 }, { "epoch": 0.3121167883211679, "grad_norm": 1.4713454344723826, "learning_rate": 8.054736972755138e-06, "loss": 0.4945, "step": 3207 }, { "epoch": 0.3122141119221411, "grad_norm": 1.1708979284493786, "learning_rate": 8.053489024286198e-06, "loss": 0.2419, "step": 3208 }, { "epoch": 0.31231143552311436, "grad_norm": 1.2933153684751388, "learning_rate": 8.052240772390176e-06, "loss": 0.4624, "step": 3209 }, { "epoch": 0.3124087591240876, "grad_norm": 1.2538957446837722, "learning_rate": 8.050992217191114e-06, "loss": 0.3305, "step": 3210 }, { "epoch": 0.3125060827250608, "grad_norm": 1.5362890980077515, "learning_rate": 8.049743358813078e-06, "loss": 0.5151, "step": 3211 }, { "epoch": 0.31260340632603406, "grad_norm": 1.3913400504692353, "learning_rate": 8.04849419738017e-06, "loss": 0.355, "step": 3212 }, { "epoch": 0.3127007299270073, "grad_norm": 1.3089021233990763, "learning_rate": 8.04724473301652e-06, "loss": 0.262, "step": 3213 }, { "epoch": 0.3127980535279805, "grad_norm": 1.0542807465796966, "learning_rate": 8.045994965846288e-06, "loss": 0.3133, "step": 3214 }, { "epoch": 0.31289537712895377, "grad_norm": 1.4749126602435412, "learning_rate": 8.044744895993666e-06, "loss": 0.46, "step": 3215 }, { "epoch": 0.312992700729927, "grad_norm": 1.3746833003058674, "learning_rate": 8.043494523582871e-06, "loss": 0.427, "step": 3216 }, { "epoch": 0.3130900243309002, "grad_norm": 1.3205796823520726, "learning_rate": 8.042243848738153e-06, "loss": 0.3354, "step": 3217 }, { "epoch": 0.31318734793187347, "grad_norm": 1.2865057356076828, "learning_rate": 8.040992871583797e-06, "loss": 0.3941, "step": 3218 }, { "epoch": 0.3132846715328467, "grad_norm": 1.1248365389432082, "learning_rate": 8.039741592244108e-06, "loss": 0.2628, "step": 3219 }, { "epoch": 0.3133819951338199, "grad_norm": 1.4652698761705358, "learning_rate": 8.03849001084343e-06, "loss": 0.3564, "step": 3220 }, { "epoch": 0.3134793187347932, "grad_norm": 1.3953599364279132, "learning_rate": 8.037238127506128e-06, "loss": 0.4163, "step": 3221 }, { "epoch": 0.31357664233576643, "grad_norm": 1.2419892638751415, "learning_rate": 8.035985942356612e-06, "loss": 0.354, "step": 3222 }, { "epoch": 0.3136739659367397, "grad_norm": 1.715104485156596, "learning_rate": 8.034733455519303e-06, "loss": 0.2963, "step": 3223 }, { "epoch": 0.3137712895377129, "grad_norm": 1.470040424076559, "learning_rate": 8.033480667118667e-06, "loss": 0.4648, "step": 3224 }, { "epoch": 0.31386861313868614, "grad_norm": 1.4565317560800817, "learning_rate": 8.032227577279191e-06, "loss": 0.512, "step": 3225 }, { "epoch": 0.3139659367396594, "grad_norm": 1.3742041452053566, "learning_rate": 8.030974186125397e-06, "loss": 0.3956, "step": 3226 }, { "epoch": 0.3140632603406326, "grad_norm": 1.3788492769137004, "learning_rate": 8.029720493781838e-06, "loss": 0.4509, "step": 3227 }, { "epoch": 0.31416058394160584, "grad_norm": 1.3858125546461868, "learning_rate": 8.028466500373089e-06, "loss": 0.2106, "step": 3228 }, { "epoch": 0.3142579075425791, "grad_norm": 1.3840076863400683, "learning_rate": 8.027212206023762e-06, "loss": 0.3038, "step": 3229 }, { "epoch": 0.3143552311435523, "grad_norm": 1.3152632009702614, "learning_rate": 8.0259576108585e-06, "loss": 0.4801, "step": 3230 }, { "epoch": 0.31445255474452555, "grad_norm": 1.3788629368363385, "learning_rate": 8.024702715001968e-06, "loss": 0.4245, "step": 3231 }, { "epoch": 0.3145498783454988, "grad_norm": 1.814337226771454, "learning_rate": 8.023447518578868e-06, "loss": 0.5632, "step": 3232 }, { "epoch": 0.314647201946472, "grad_norm": 1.4232062511008752, "learning_rate": 8.02219202171393e-06, "loss": 0.3286, "step": 3233 }, { "epoch": 0.31474452554744525, "grad_norm": 1.280903932308868, "learning_rate": 8.020936224531912e-06, "loss": 0.3626, "step": 3234 }, { "epoch": 0.3148418491484185, "grad_norm": 1.0510702763807533, "learning_rate": 8.019680127157607e-06, "loss": 0.2524, "step": 3235 }, { "epoch": 0.3149391727493917, "grad_norm": 1.7478480050109064, "learning_rate": 8.018423729715832e-06, "loss": 0.4348, "step": 3236 }, { "epoch": 0.31503649635036496, "grad_norm": 1.5359033994100462, "learning_rate": 8.017167032331434e-06, "loss": 0.4124, "step": 3237 }, { "epoch": 0.3151338199513382, "grad_norm": 1.506288459888311, "learning_rate": 8.015910035129294e-06, "loss": 0.3261, "step": 3238 }, { "epoch": 0.3152311435523114, "grad_norm": 1.2745124165082422, "learning_rate": 8.01465273823432e-06, "loss": 0.464, "step": 3239 }, { "epoch": 0.31532846715328466, "grad_norm": 1.338919807585357, "learning_rate": 8.01339514177145e-06, "loss": 0.4172, "step": 3240 }, { "epoch": 0.3154257907542579, "grad_norm": 1.486780887173232, "learning_rate": 8.012137245865654e-06, "loss": 0.5408, "step": 3241 }, { "epoch": 0.31552311435523117, "grad_norm": 1.3714950278620026, "learning_rate": 8.010879050641927e-06, "loss": 0.3436, "step": 3242 }, { "epoch": 0.31562043795620437, "grad_norm": 1.631965892281063, "learning_rate": 8.009620556225298e-06, "loss": 0.4727, "step": 3243 }, { "epoch": 0.3157177615571776, "grad_norm": 1.6149761911532763, "learning_rate": 8.008361762740825e-06, "loss": 0.4924, "step": 3244 }, { "epoch": 0.3158150851581509, "grad_norm": 1.3425533377851226, "learning_rate": 8.007102670313596e-06, "loss": 0.3844, "step": 3245 }, { "epoch": 0.3159124087591241, "grad_norm": 1.5650571116791179, "learning_rate": 8.005843279068724e-06, "loss": 0.5109, "step": 3246 }, { "epoch": 0.31600973236009733, "grad_norm": 1.469317468630812, "learning_rate": 8.004583589131359e-06, "loss": 0.3981, "step": 3247 }, { "epoch": 0.3161070559610706, "grad_norm": 1.3520956586482695, "learning_rate": 8.003323600626675e-06, "loss": 0.3628, "step": 3248 }, { "epoch": 0.3162043795620438, "grad_norm": 1.306843877245721, "learning_rate": 8.002063313679881e-06, "loss": 0.3738, "step": 3249 }, { "epoch": 0.31630170316301703, "grad_norm": 1.2062559137545288, "learning_rate": 8.000802728416209e-06, "loss": 0.3603, "step": 3250 }, { "epoch": 0.3163990267639903, "grad_norm": 1.2270301850514787, "learning_rate": 7.999541844960926e-06, "loss": 0.3444, "step": 3251 }, { "epoch": 0.3164963503649635, "grad_norm": 1.6301989651574331, "learning_rate": 7.998280663439325e-06, "loss": 0.5442, "step": 3252 }, { "epoch": 0.31659367396593674, "grad_norm": 1.3633091002846736, "learning_rate": 7.997019183976732e-06, "loss": 0.4596, "step": 3253 }, { "epoch": 0.31669099756691, "grad_norm": 1.0930243128300028, "learning_rate": 7.9957574066985e-06, "loss": 0.259, "step": 3254 }, { "epoch": 0.3167883211678832, "grad_norm": 1.3445912322829077, "learning_rate": 7.994495331730014e-06, "loss": 0.438, "step": 3255 }, { "epoch": 0.31688564476885644, "grad_norm": 1.3651374487790005, "learning_rate": 7.993232959196687e-06, "loss": 0.4589, "step": 3256 }, { "epoch": 0.3169829683698297, "grad_norm": 1.3424961538987858, "learning_rate": 7.99197028922396e-06, "loss": 0.4367, "step": 3257 }, { "epoch": 0.3170802919708029, "grad_norm": 1.5932344837064665, "learning_rate": 7.990707321937308e-06, "loss": 0.6921, "step": 3258 }, { "epoch": 0.31717761557177615, "grad_norm": 1.475521709829228, "learning_rate": 7.989444057462228e-06, "loss": 0.4759, "step": 3259 }, { "epoch": 0.3172749391727494, "grad_norm": 1.2971398875872913, "learning_rate": 7.988180495924256e-06, "loss": 0.4588, "step": 3260 }, { "epoch": 0.3173722627737226, "grad_norm": 1.4756687426354647, "learning_rate": 7.986916637448953e-06, "loss": 0.4776, "step": 3261 }, { "epoch": 0.31746958637469586, "grad_norm": 1.6271934377592354, "learning_rate": 7.985652482161907e-06, "loss": 0.4979, "step": 3262 }, { "epoch": 0.3175669099756691, "grad_norm": 1.6597644298654206, "learning_rate": 7.984388030188739e-06, "loss": 0.6091, "step": 3263 }, { "epoch": 0.31766423357664236, "grad_norm": 1.3383482658500123, "learning_rate": 7.983123281655097e-06, "loss": 0.4371, "step": 3264 }, { "epoch": 0.31776155717761556, "grad_norm": 1.4591756386356707, "learning_rate": 7.981858236686661e-06, "loss": 0.4888, "step": 3265 }, { "epoch": 0.3178588807785888, "grad_norm": 1.1677482677553854, "learning_rate": 7.98059289540914e-06, "loss": 0.348, "step": 3266 }, { "epoch": 0.31795620437956207, "grad_norm": 1.280054884121347, "learning_rate": 7.97932725794827e-06, "loss": 0.3909, "step": 3267 }, { "epoch": 0.31805352798053527, "grad_norm": 1.2818245878680554, "learning_rate": 7.97806132442982e-06, "loss": 0.3432, "step": 3268 }, { "epoch": 0.3181508515815085, "grad_norm": 1.2511980990717368, "learning_rate": 7.976795094979586e-06, "loss": 0.398, "step": 3269 }, { "epoch": 0.3182481751824818, "grad_norm": 1.1398243641659185, "learning_rate": 7.975528569723391e-06, "loss": 0.3561, "step": 3270 }, { "epoch": 0.31834549878345497, "grad_norm": 1.4375913010503336, "learning_rate": 7.974261748787096e-06, "loss": 0.4341, "step": 3271 }, { "epoch": 0.3184428223844282, "grad_norm": 1.5232808350435216, "learning_rate": 7.972994632296583e-06, "loss": 0.443, "step": 3272 }, { "epoch": 0.3185401459854015, "grad_norm": 1.1586214953526035, "learning_rate": 7.971727220377765e-06, "loss": 0.3709, "step": 3273 }, { "epoch": 0.3186374695863747, "grad_norm": 1.5274740880588324, "learning_rate": 7.970459513156587e-06, "loss": 0.3699, "step": 3274 }, { "epoch": 0.31873479318734793, "grad_norm": 1.3981699767571285, "learning_rate": 7.969191510759021e-06, "loss": 0.3678, "step": 3275 }, { "epoch": 0.3188321167883212, "grad_norm": 1.934723248663663, "learning_rate": 7.96792321331107e-06, "loss": 0.5528, "step": 3276 }, { "epoch": 0.3189294403892944, "grad_norm": 1.3063523868424662, "learning_rate": 7.966654620938765e-06, "loss": 0.381, "step": 3277 }, { "epoch": 0.31902676399026764, "grad_norm": 1.4316009890701873, "learning_rate": 7.965385733768166e-06, "loss": 0.3462, "step": 3278 }, { "epoch": 0.3191240875912409, "grad_norm": 1.3925079285209303, "learning_rate": 7.964116551925365e-06, "loss": 0.3468, "step": 3279 }, { "epoch": 0.3192214111922141, "grad_norm": 1.5929020888752208, "learning_rate": 7.96284707553648e-06, "loss": 0.5416, "step": 3280 }, { "epoch": 0.31931873479318734, "grad_norm": 1.3866900373898063, "learning_rate": 7.961577304727659e-06, "loss": 0.3982, "step": 3281 }, { "epoch": 0.3194160583941606, "grad_norm": 1.4962340605356037, "learning_rate": 7.960307239625082e-06, "loss": 0.4023, "step": 3282 }, { "epoch": 0.3195133819951338, "grad_norm": 1.4057559523045156, "learning_rate": 7.959036880354955e-06, "loss": 0.495, "step": 3283 }, { "epoch": 0.31961070559610705, "grad_norm": 1.2622065590243314, "learning_rate": 7.957766227043514e-06, "loss": 0.3581, "step": 3284 }, { "epoch": 0.3197080291970803, "grad_norm": 1.5090453488845967, "learning_rate": 7.956495279817026e-06, "loss": 0.455, "step": 3285 }, { "epoch": 0.31980535279805355, "grad_norm": 1.3133879337401893, "learning_rate": 7.955224038801785e-06, "loss": 0.4625, "step": 3286 }, { "epoch": 0.31990267639902675, "grad_norm": 2.521541296377749, "learning_rate": 7.953952504124114e-06, "loss": 0.4415, "step": 3287 }, { "epoch": 0.32, "grad_norm": 1.3567937262050411, "learning_rate": 7.952680675910365e-06, "loss": 0.3309, "step": 3288 }, { "epoch": 0.32009732360097326, "grad_norm": 1.5421944908903493, "learning_rate": 7.951408554286926e-06, "loss": 0.4589, "step": 3289 }, { "epoch": 0.32019464720194646, "grad_norm": 1.5998274173642424, "learning_rate": 7.950136139380204e-06, "loss": 0.5359, "step": 3290 }, { "epoch": 0.3202919708029197, "grad_norm": 1.2725707304657317, "learning_rate": 7.948863431316639e-06, "loss": 0.3625, "step": 3291 }, { "epoch": 0.32038929440389297, "grad_norm": 1.4290851095226622, "learning_rate": 7.947590430222702e-06, "loss": 0.4872, "step": 3292 }, { "epoch": 0.32048661800486616, "grad_norm": 1.3420498316619087, "learning_rate": 7.946317136224894e-06, "loss": 0.2389, "step": 3293 }, { "epoch": 0.3205839416058394, "grad_norm": 1.5002712163507215, "learning_rate": 7.94504354944974e-06, "loss": 0.5157, "step": 3294 }, { "epoch": 0.32068126520681267, "grad_norm": 1.660535414536618, "learning_rate": 7.9437696700238e-06, "loss": 0.4267, "step": 3295 }, { "epoch": 0.32077858880778587, "grad_norm": 1.2370297819791147, "learning_rate": 7.942495498073657e-06, "loss": 0.3355, "step": 3296 }, { "epoch": 0.3208759124087591, "grad_norm": 1.1415276287275913, "learning_rate": 7.941221033725928e-06, "loss": 0.2944, "step": 3297 }, { "epoch": 0.3209732360097324, "grad_norm": 1.5554788387015477, "learning_rate": 7.939946277107258e-06, "loss": 0.4871, "step": 3298 }, { "epoch": 0.3210705596107056, "grad_norm": 1.2985241068062738, "learning_rate": 7.938671228344319e-06, "loss": 0.3143, "step": 3299 }, { "epoch": 0.32116788321167883, "grad_norm": 1.301901578297674, "learning_rate": 7.937395887563812e-06, "loss": 0.3965, "step": 3300 }, { "epoch": 0.3212652068126521, "grad_norm": 0.9529902878913864, "learning_rate": 7.936120254892471e-06, "loss": 0.3083, "step": 3301 }, { "epoch": 0.3213625304136253, "grad_norm": 1.442015067028423, "learning_rate": 7.934844330457056e-06, "loss": 0.4318, "step": 3302 }, { "epoch": 0.32145985401459853, "grad_norm": 0.8997835110854661, "learning_rate": 7.933568114384358e-06, "loss": 0.2885, "step": 3303 }, { "epoch": 0.3215571776155718, "grad_norm": 1.5293826180265608, "learning_rate": 7.932291606801192e-06, "loss": 0.5437, "step": 3304 }, { "epoch": 0.321654501216545, "grad_norm": 1.2264156375116992, "learning_rate": 7.931014807834405e-06, "loss": 0.4001, "step": 3305 }, { "epoch": 0.32175182481751824, "grad_norm": 1.260350527748902, "learning_rate": 7.929737717610878e-06, "loss": 0.3847, "step": 3306 }, { "epoch": 0.3218491484184915, "grad_norm": 1.1346472253273232, "learning_rate": 7.92846033625751e-06, "loss": 0.3766, "step": 3307 }, { "epoch": 0.32194647201946475, "grad_norm": 1.1035142206503785, "learning_rate": 7.927182663901241e-06, "loss": 0.369, "step": 3308 }, { "epoch": 0.32204379562043794, "grad_norm": 1.2980599562576733, "learning_rate": 7.92590470066903e-06, "loss": 0.3982, "step": 3309 }, { "epoch": 0.3221411192214112, "grad_norm": 1.1742301163722888, "learning_rate": 7.924626446687871e-06, "loss": 0.3423, "step": 3310 }, { "epoch": 0.32223844282238445, "grad_norm": 1.8345279558348917, "learning_rate": 7.923347902084784e-06, "loss": 0.3145, "step": 3311 }, { "epoch": 0.32233576642335765, "grad_norm": 1.562283157560816, "learning_rate": 7.92206906698682e-06, "loss": 0.4308, "step": 3312 }, { "epoch": 0.3224330900243309, "grad_norm": 1.568673716358362, "learning_rate": 7.920789941521053e-06, "loss": 0.7025, "step": 3313 }, { "epoch": 0.32253041362530416, "grad_norm": 1.409203500738135, "learning_rate": 7.9195105258146e-06, "loss": 0.459, "step": 3314 }, { "epoch": 0.32262773722627736, "grad_norm": 0.880538645206563, "learning_rate": 7.918230819994589e-06, "loss": 0.2786, "step": 3315 }, { "epoch": 0.3227250608272506, "grad_norm": 1.1906838149715093, "learning_rate": 7.916950824188188e-06, "loss": 0.2686, "step": 3316 }, { "epoch": 0.32282238442822386, "grad_norm": 1.5111358859487132, "learning_rate": 7.91567053852259e-06, "loss": 0.5147, "step": 3317 }, { "epoch": 0.32291970802919706, "grad_norm": 1.1464259378074475, "learning_rate": 7.914389963125018e-06, "loss": 0.2685, "step": 3318 }, { "epoch": 0.3230170316301703, "grad_norm": 1.581506486679798, "learning_rate": 7.913109098122726e-06, "loss": 0.5854, "step": 3319 }, { "epoch": 0.32311435523114357, "grad_norm": 1.3242645255760028, "learning_rate": 7.91182794364299e-06, "loss": 0.2315, "step": 3320 }, { "epoch": 0.32321167883211677, "grad_norm": 1.6094300344001513, "learning_rate": 7.910546499813125e-06, "loss": 0.4739, "step": 3321 }, { "epoch": 0.32330900243309, "grad_norm": 1.3446882210600364, "learning_rate": 7.909264766760462e-06, "loss": 0.4145, "step": 3322 }, { "epoch": 0.3234063260340633, "grad_norm": 1.282785844235514, "learning_rate": 7.907982744612373e-06, "loss": 0.4324, "step": 3323 }, { "epoch": 0.32350364963503647, "grad_norm": 1.8199182173411141, "learning_rate": 7.90670043349625e-06, "loss": 0.2738, "step": 3324 }, { "epoch": 0.3236009732360097, "grad_norm": 1.3779170997525898, "learning_rate": 7.90541783353952e-06, "loss": 0.4386, "step": 3325 }, { "epoch": 0.323698296836983, "grad_norm": 1.4651475108226641, "learning_rate": 7.904134944869631e-06, "loss": 0.2272, "step": 3326 }, { "epoch": 0.3237956204379562, "grad_norm": 1.4444649350514327, "learning_rate": 7.902851767614069e-06, "loss": 0.3631, "step": 3327 }, { "epoch": 0.32389294403892943, "grad_norm": 1.6625856822026597, "learning_rate": 7.901568301900343e-06, "loss": 0.3649, "step": 3328 }, { "epoch": 0.3239902676399027, "grad_norm": 1.4506728288423798, "learning_rate": 7.900284547855992e-06, "loss": 0.3231, "step": 3329 }, { "epoch": 0.32408759124087594, "grad_norm": 1.5932450335999997, "learning_rate": 7.899000505608583e-06, "loss": 0.6145, "step": 3330 }, { "epoch": 0.32418491484184914, "grad_norm": 1.466657731000094, "learning_rate": 7.89771617528571e-06, "loss": 0.4498, "step": 3331 }, { "epoch": 0.3242822384428224, "grad_norm": 1.1444340687397683, "learning_rate": 7.896431557015001e-06, "loss": 0.3953, "step": 3332 }, { "epoch": 0.32437956204379564, "grad_norm": 1.2696327521021862, "learning_rate": 7.895146650924106e-06, "loss": 0.3974, "step": 3333 }, { "epoch": 0.32447688564476884, "grad_norm": 1.0672959190242737, "learning_rate": 7.893861457140711e-06, "loss": 0.3147, "step": 3334 }, { "epoch": 0.3245742092457421, "grad_norm": 1.5878275615936528, "learning_rate": 7.892575975792524e-06, "loss": 0.5637, "step": 3335 }, { "epoch": 0.32467153284671535, "grad_norm": 1.3553067860868797, "learning_rate": 7.891290207007284e-06, "loss": 0.3979, "step": 3336 }, { "epoch": 0.32476885644768855, "grad_norm": 1.3585205023281657, "learning_rate": 7.890004150912758e-06, "loss": 0.5408, "step": 3337 }, { "epoch": 0.3248661800486618, "grad_norm": 1.3335333713837063, "learning_rate": 7.888717807636745e-06, "loss": 0.5097, "step": 3338 }, { "epoch": 0.32496350364963505, "grad_norm": 1.6084982696846433, "learning_rate": 7.887431177307067e-06, "loss": 0.6652, "step": 3339 }, { "epoch": 0.32506082725060825, "grad_norm": 1.5254713518221517, "learning_rate": 7.886144260051577e-06, "loss": 0.5413, "step": 3340 }, { "epoch": 0.3251581508515815, "grad_norm": 1.2319691547427678, "learning_rate": 7.88485705599816e-06, "loss": 0.3669, "step": 3341 }, { "epoch": 0.32525547445255476, "grad_norm": 1.8060436355287317, "learning_rate": 7.883569565274722e-06, "loss": 0.332, "step": 3342 }, { "epoch": 0.32535279805352796, "grad_norm": 1.2724687132520958, "learning_rate": 7.882281788009207e-06, "loss": 0.4156, "step": 3343 }, { "epoch": 0.3254501216545012, "grad_norm": 1.2678653056689784, "learning_rate": 7.880993724329578e-06, "loss": 0.34, "step": 3344 }, { "epoch": 0.32554744525547447, "grad_norm": 1.6551598614012555, "learning_rate": 7.879705374363831e-06, "loss": 0.4642, "step": 3345 }, { "epoch": 0.32564476885644766, "grad_norm": 1.3206920191183078, "learning_rate": 7.878416738239991e-06, "loss": 0.3755, "step": 3346 }, { "epoch": 0.3257420924574209, "grad_norm": 1.3858157374277495, "learning_rate": 7.877127816086109e-06, "loss": 0.3394, "step": 3347 }, { "epoch": 0.32583941605839417, "grad_norm": 2.0774474222482286, "learning_rate": 7.87583860803027e-06, "loss": 0.4237, "step": 3348 }, { "epoch": 0.32593673965936737, "grad_norm": 2.373250216339497, "learning_rate": 7.87454911420058e-06, "loss": 0.4854, "step": 3349 }, { "epoch": 0.3260340632603406, "grad_norm": 1.2888779016735306, "learning_rate": 7.873259334725177e-06, "loss": 0.2953, "step": 3350 }, { "epoch": 0.3261313868613139, "grad_norm": 1.4655511800886896, "learning_rate": 7.87196926973223e-06, "loss": 0.5252, "step": 3351 }, { "epoch": 0.32622871046228713, "grad_norm": 1.3179348636560488, "learning_rate": 7.870678919349929e-06, "loss": 0.3587, "step": 3352 }, { "epoch": 0.32632603406326033, "grad_norm": 1.5442264887171864, "learning_rate": 7.869388283706501e-06, "loss": 0.3808, "step": 3353 }, { "epoch": 0.3264233576642336, "grad_norm": 0.909176934030135, "learning_rate": 7.868097362930194e-06, "loss": 0.1721, "step": 3354 }, { "epoch": 0.32652068126520684, "grad_norm": 1.5955557971117078, "learning_rate": 7.866806157149291e-06, "loss": 0.5127, "step": 3355 }, { "epoch": 0.32661800486618003, "grad_norm": 1.260336198155953, "learning_rate": 7.865514666492096e-06, "loss": 0.2699, "step": 3356 }, { "epoch": 0.3267153284671533, "grad_norm": 1.5004384671553455, "learning_rate": 7.864222891086948e-06, "loss": 0.3168, "step": 3357 }, { "epoch": 0.32681265206812654, "grad_norm": 1.261076205541224, "learning_rate": 7.862930831062211e-06, "loss": 0.3678, "step": 3358 }, { "epoch": 0.32690997566909974, "grad_norm": 1.637405579152474, "learning_rate": 7.861638486546279e-06, "loss": 0.4613, "step": 3359 }, { "epoch": 0.327007299270073, "grad_norm": 1.3584995739472485, "learning_rate": 7.860345857667571e-06, "loss": 0.3229, "step": 3360 }, { "epoch": 0.32710462287104625, "grad_norm": 1.5428951197572305, "learning_rate": 7.859052944554537e-06, "loss": 0.4904, "step": 3361 }, { "epoch": 0.32720194647201944, "grad_norm": 1.2674356859582525, "learning_rate": 7.857759747335652e-06, "loss": 0.2942, "step": 3362 }, { "epoch": 0.3272992700729927, "grad_norm": 1.5802075836374327, "learning_rate": 7.856466266139426e-06, "loss": 0.2949, "step": 3363 }, { "epoch": 0.32739659367396595, "grad_norm": 1.8209546966028776, "learning_rate": 7.855172501094394e-06, "loss": 0.5036, "step": 3364 }, { "epoch": 0.32749391727493915, "grad_norm": 1.5829487718400805, "learning_rate": 7.853878452329113e-06, "loss": 0.3638, "step": 3365 }, { "epoch": 0.3275912408759124, "grad_norm": 1.439883288183698, "learning_rate": 7.852584119972178e-06, "loss": 0.4529, "step": 3366 }, { "epoch": 0.32768856447688566, "grad_norm": 1.188441154560626, "learning_rate": 7.851289504152201e-06, "loss": 0.1984, "step": 3367 }, { "epoch": 0.32778588807785886, "grad_norm": 1.3841398947965158, "learning_rate": 7.84999460499784e-06, "loss": 0.3274, "step": 3368 }, { "epoch": 0.3278832116788321, "grad_norm": 1.3784180119913427, "learning_rate": 7.848699422637757e-06, "loss": 0.5186, "step": 3369 }, { "epoch": 0.32798053527980536, "grad_norm": 1.6167829071938324, "learning_rate": 7.847403957200667e-06, "loss": 0.5905, "step": 3370 }, { "epoch": 0.32807785888077856, "grad_norm": 1.5035664756701093, "learning_rate": 7.846108208815292e-06, "loss": 0.3502, "step": 3371 }, { "epoch": 0.3281751824817518, "grad_norm": 1.3836098651173667, "learning_rate": 7.844812177610398e-06, "loss": 0.426, "step": 3372 }, { "epoch": 0.32827250608272507, "grad_norm": 1.33208856990685, "learning_rate": 7.843515863714766e-06, "loss": 0.38, "step": 3373 }, { "epoch": 0.3283698296836983, "grad_norm": 1.3222208170433662, "learning_rate": 7.842219267257216e-06, "loss": 0.33, "step": 3374 }, { "epoch": 0.3284671532846715, "grad_norm": 1.26421550866771, "learning_rate": 7.84092238836659e-06, "loss": 0.3682, "step": 3375 }, { "epoch": 0.3285644768856448, "grad_norm": 1.3594451535417975, "learning_rate": 7.839625227171762e-06, "loss": 0.4504, "step": 3376 }, { "epoch": 0.328661800486618, "grad_norm": 1.2038996790979526, "learning_rate": 7.838327783801627e-06, "loss": 0.3675, "step": 3377 }, { "epoch": 0.3287591240875912, "grad_norm": 1.2523761100181583, "learning_rate": 7.837030058385117e-06, "loss": 0.2582, "step": 3378 }, { "epoch": 0.3288564476885645, "grad_norm": 1.38623949822592, "learning_rate": 7.835732051051188e-06, "loss": 0.426, "step": 3379 }, { "epoch": 0.32895377128953773, "grad_norm": 1.6752196039887233, "learning_rate": 7.834433761928819e-06, "loss": 0.5995, "step": 3380 }, { "epoch": 0.32905109489051093, "grad_norm": 1.4769893614554368, "learning_rate": 7.833135191147027e-06, "loss": 0.4434, "step": 3381 }, { "epoch": 0.3291484184914842, "grad_norm": 1.3473793961690093, "learning_rate": 7.831836338834851e-06, "loss": 0.4064, "step": 3382 }, { "epoch": 0.32924574209245744, "grad_norm": 1.4564612861566626, "learning_rate": 7.830537205121354e-06, "loss": 0.5275, "step": 3383 }, { "epoch": 0.32934306569343064, "grad_norm": 1.4662477809046923, "learning_rate": 7.829237790135638e-06, "loss": 0.3745, "step": 3384 }, { "epoch": 0.3294403892944039, "grad_norm": 1.3883338656353856, "learning_rate": 7.827938094006822e-06, "loss": 0.4361, "step": 3385 }, { "epoch": 0.32953771289537714, "grad_norm": 1.2360062745222065, "learning_rate": 7.826638116864061e-06, "loss": 0.2936, "step": 3386 }, { "epoch": 0.32963503649635034, "grad_norm": 1.2636393287865908, "learning_rate": 7.82533785883653e-06, "loss": 0.3816, "step": 3387 }, { "epoch": 0.3297323600973236, "grad_norm": 1.5704874728825693, "learning_rate": 7.824037320053442e-06, "loss": 0.4946, "step": 3388 }, { "epoch": 0.32982968369829685, "grad_norm": 1.5450366878165769, "learning_rate": 7.822736500644028e-06, "loss": 0.5973, "step": 3389 }, { "epoch": 0.32992700729927005, "grad_norm": 1.570140468606553, "learning_rate": 7.821435400737555e-06, "loss": 0.6187, "step": 3390 }, { "epoch": 0.3300243309002433, "grad_norm": 1.404973531589098, "learning_rate": 7.820134020463311e-06, "loss": 0.4404, "step": 3391 }, { "epoch": 0.33012165450121655, "grad_norm": 1.4221866811013593, "learning_rate": 7.818832359950615e-06, "loss": 0.4375, "step": 3392 }, { "epoch": 0.3302189781021898, "grad_norm": 1.3514761483715907, "learning_rate": 7.817530419328815e-06, "loss": 0.4633, "step": 3393 }, { "epoch": 0.330316301703163, "grad_norm": 1.4122938191319212, "learning_rate": 7.816228198727287e-06, "loss": 0.4735, "step": 3394 }, { "epoch": 0.33041362530413626, "grad_norm": 1.2807472917541904, "learning_rate": 7.814925698275432e-06, "loss": 0.2993, "step": 3395 }, { "epoch": 0.3305109489051095, "grad_norm": 1.2374164437493267, "learning_rate": 7.813622918102679e-06, "loss": 0.4486, "step": 3396 }, { "epoch": 0.3306082725060827, "grad_norm": 1.4923726710921128, "learning_rate": 7.812319858338486e-06, "loss": 0.3976, "step": 3397 }, { "epoch": 0.33070559610705597, "grad_norm": 1.4652422146853137, "learning_rate": 7.811016519112342e-06, "loss": 0.509, "step": 3398 }, { "epoch": 0.3308029197080292, "grad_norm": 1.3523346564010856, "learning_rate": 7.80971290055376e-06, "loss": 0.4045, "step": 3399 }, { "epoch": 0.3309002433090024, "grad_norm": 1.4034359644709637, "learning_rate": 7.808409002792277e-06, "loss": 0.5252, "step": 3400 }, { "epoch": 0.33099756690997567, "grad_norm": 1.5977042267924388, "learning_rate": 7.807104825957466e-06, "loss": 0.5708, "step": 3401 }, { "epoch": 0.3310948905109489, "grad_norm": 1.194169039851455, "learning_rate": 7.805800370178925e-06, "loss": 0.2592, "step": 3402 }, { "epoch": 0.3311922141119221, "grad_norm": 1.3572077520529662, "learning_rate": 7.804495635586274e-06, "loss": 0.3838, "step": 3403 }, { "epoch": 0.3312895377128954, "grad_norm": 1.6103699950857218, "learning_rate": 7.80319062230917e-06, "loss": 0.2847, "step": 3404 }, { "epoch": 0.33138686131386863, "grad_norm": 1.1427751941761943, "learning_rate": 7.80188533047729e-06, "loss": 0.3235, "step": 3405 }, { "epoch": 0.33148418491484183, "grad_norm": 1.4205910051862616, "learning_rate": 7.800579760220343e-06, "loss": 0.4415, "step": 3406 }, { "epoch": 0.3315815085158151, "grad_norm": 1.239833112068907, "learning_rate": 7.799273911668062e-06, "loss": 0.296, "step": 3407 }, { "epoch": 0.33167883211678834, "grad_norm": 1.382006652854662, "learning_rate": 7.797967784950215e-06, "loss": 0.5129, "step": 3408 }, { "epoch": 0.33177615571776153, "grad_norm": 1.3910482812533478, "learning_rate": 7.796661380196587e-06, "loss": 0.4355, "step": 3409 }, { "epoch": 0.3318734793187348, "grad_norm": 1.3166393673557537, "learning_rate": 7.795354697537e-06, "loss": 0.3357, "step": 3410 }, { "epoch": 0.33197080291970804, "grad_norm": 1.3540344564992455, "learning_rate": 7.794047737101298e-06, "loss": 0.2772, "step": 3411 }, { "epoch": 0.33206812652068124, "grad_norm": 1.5732997739305445, "learning_rate": 7.792740499019354e-06, "loss": 0.368, "step": 3412 }, { "epoch": 0.3321654501216545, "grad_norm": 1.00398349093736, "learning_rate": 7.791432983421071e-06, "loss": 0.2794, "step": 3413 }, { "epoch": 0.33226277372262775, "grad_norm": 1.5831231764140208, "learning_rate": 7.790125190436378e-06, "loss": 0.399, "step": 3414 }, { "epoch": 0.332360097323601, "grad_norm": 1.301291609070449, "learning_rate": 7.788817120195228e-06, "loss": 0.4975, "step": 3415 }, { "epoch": 0.3324574209245742, "grad_norm": 3.04667201221868, "learning_rate": 7.787508772827606e-06, "loss": 0.3034, "step": 3416 }, { "epoch": 0.33255474452554745, "grad_norm": 1.3711038355442808, "learning_rate": 7.786200148463525e-06, "loss": 0.4023, "step": 3417 }, { "epoch": 0.3326520681265207, "grad_norm": 1.4460757328108422, "learning_rate": 7.784891247233025e-06, "loss": 0.5218, "step": 3418 }, { "epoch": 0.3327493917274939, "grad_norm": 1.5174415949182483, "learning_rate": 7.783582069266167e-06, "loss": 0.5401, "step": 3419 }, { "epoch": 0.33284671532846716, "grad_norm": 1.170900270080405, "learning_rate": 7.78227261469305e-06, "loss": 0.3093, "step": 3420 }, { "epoch": 0.3329440389294404, "grad_norm": 1.1117784496155982, "learning_rate": 7.78096288364379e-06, "loss": 0.2159, "step": 3421 }, { "epoch": 0.3330413625304136, "grad_norm": 1.385907002729564, "learning_rate": 7.779652876248541e-06, "loss": 0.4513, "step": 3422 }, { "epoch": 0.33313868613138686, "grad_norm": 1.091845134289533, "learning_rate": 7.778342592637477e-06, "loss": 0.249, "step": 3423 }, { "epoch": 0.3332360097323601, "grad_norm": 1.1987125682853903, "learning_rate": 7.7770320329408e-06, "loss": 0.4583, "step": 3424 }, { "epoch": 0.3333333333333333, "grad_norm": 1.150260309114711, "learning_rate": 7.775721197288746e-06, "loss": 0.4145, "step": 3425 }, { "epoch": 0.33343065693430657, "grad_norm": 1.1244746146994131, "learning_rate": 7.77441008581157e-06, "loss": 0.2334, "step": 3426 }, { "epoch": 0.3335279805352798, "grad_norm": 2.372002969337908, "learning_rate": 7.773098698639558e-06, "loss": 0.3346, "step": 3427 }, { "epoch": 0.333625304136253, "grad_norm": 1.460305633169593, "learning_rate": 7.771787035903023e-06, "loss": 0.5202, "step": 3428 }, { "epoch": 0.3337226277372263, "grad_norm": 1.4552706392676258, "learning_rate": 7.77047509773231e-06, "loss": 0.3249, "step": 3429 }, { "epoch": 0.3338199513381995, "grad_norm": 26.423500051667432, "learning_rate": 7.769162884257778e-06, "loss": 0.3919, "step": 3430 }, { "epoch": 0.3339172749391727, "grad_norm": 1.3364724766772538, "learning_rate": 7.767850395609832e-06, "loss": 0.4882, "step": 3431 }, { "epoch": 0.334014598540146, "grad_norm": 1.162494089255124, "learning_rate": 7.766537631918888e-06, "loss": 0.4172, "step": 3432 }, { "epoch": 0.33411192214111923, "grad_norm": 1.4584273535075323, "learning_rate": 7.765224593315402e-06, "loss": 0.5721, "step": 3433 }, { "epoch": 0.33420924574209243, "grad_norm": 1.39936657097592, "learning_rate": 7.763911279929848e-06, "loss": 0.4454, "step": 3434 }, { "epoch": 0.3343065693430657, "grad_norm": 1.5774498430067907, "learning_rate": 7.76259769189273e-06, "loss": 0.6756, "step": 3435 }, { "epoch": 0.33440389294403894, "grad_norm": 1.4346477573335101, "learning_rate": 7.761283829334583e-06, "loss": 0.4939, "step": 3436 }, { "epoch": 0.3345012165450122, "grad_norm": 1.2329772568374064, "learning_rate": 7.759969692385963e-06, "loss": 0.3576, "step": 3437 }, { "epoch": 0.3345985401459854, "grad_norm": 86.06815068595351, "learning_rate": 7.75865528117746e-06, "loss": 0.7983, "step": 3438 }, { "epoch": 0.33469586374695864, "grad_norm": 1.4916748444459116, "learning_rate": 7.757340595839686e-06, "loss": 0.3408, "step": 3439 }, { "epoch": 0.3347931873479319, "grad_norm": 1.204864631425379, "learning_rate": 7.756025636503281e-06, "loss": 0.2893, "step": 3440 }, { "epoch": 0.3348905109489051, "grad_norm": 1.1483309949418294, "learning_rate": 7.754710403298915e-06, "loss": 0.307, "step": 3441 }, { "epoch": 0.33498783454987835, "grad_norm": 1.3801437746700074, "learning_rate": 7.753394896357283e-06, "loss": 0.5086, "step": 3442 }, { "epoch": 0.3350851581508516, "grad_norm": 1.4670528589774587, "learning_rate": 7.752079115809105e-06, "loss": 0.5494, "step": 3443 }, { "epoch": 0.3351824817518248, "grad_norm": 1.2268331435647832, "learning_rate": 7.750763061785139e-06, "loss": 0.3421, "step": 3444 }, { "epoch": 0.33527980535279805, "grad_norm": 1.117498287907938, "learning_rate": 7.749446734416153e-06, "loss": 0.3583, "step": 3445 }, { "epoch": 0.3353771289537713, "grad_norm": 1.6628933950216975, "learning_rate": 7.748130133832956e-06, "loss": 0.4265, "step": 3446 }, { "epoch": 0.3354744525547445, "grad_norm": 1.4371941282513903, "learning_rate": 7.746813260166379e-06, "loss": 0.5499, "step": 3447 }, { "epoch": 0.33557177615571776, "grad_norm": 1.4139636094726638, "learning_rate": 7.74549611354728e-06, "loss": 0.5113, "step": 3448 }, { "epoch": 0.335669099756691, "grad_norm": 1.3607040572953095, "learning_rate": 7.744178694106545e-06, "loss": 0.3662, "step": 3449 }, { "epoch": 0.3357664233576642, "grad_norm": 1.3497420382405303, "learning_rate": 7.742861001975086e-06, "loss": 0.37, "step": 3450 }, { "epoch": 0.33586374695863747, "grad_norm": 1.1583812763163044, "learning_rate": 7.741543037283844e-06, "loss": 0.2328, "step": 3451 }, { "epoch": 0.3359610705596107, "grad_norm": 1.233691835278808, "learning_rate": 7.740224800163783e-06, "loss": 0.3023, "step": 3452 }, { "epoch": 0.3360583941605839, "grad_norm": 1.770041486932794, "learning_rate": 7.738906290745902e-06, "loss": 0.4559, "step": 3453 }, { "epoch": 0.33615571776155717, "grad_norm": 1.6249038227669963, "learning_rate": 7.737587509161218e-06, "loss": 0.3305, "step": 3454 }, { "epoch": 0.3362530413625304, "grad_norm": 1.7123859840919058, "learning_rate": 7.73626845554078e-06, "loss": 0.8223, "step": 3455 }, { "epoch": 0.3363503649635036, "grad_norm": 1.465565821382168, "learning_rate": 7.734949130015665e-06, "loss": 0.3951, "step": 3456 }, { "epoch": 0.3364476885644769, "grad_norm": 1.3358312642650005, "learning_rate": 7.733629532716974e-06, "loss": 0.3988, "step": 3457 }, { "epoch": 0.33654501216545013, "grad_norm": 1.4614411980006665, "learning_rate": 7.732309663775834e-06, "loss": 0.4447, "step": 3458 }, { "epoch": 0.3366423357664234, "grad_norm": 1.5007446348141111, "learning_rate": 7.730989523323405e-06, "loss": 0.5075, "step": 3459 }, { "epoch": 0.3367396593673966, "grad_norm": 1.378374467272079, "learning_rate": 7.72966911149087e-06, "loss": 0.3713, "step": 3460 }, { "epoch": 0.33683698296836984, "grad_norm": 1.264554548276351, "learning_rate": 7.728348428409434e-06, "loss": 0.4239, "step": 3461 }, { "epoch": 0.3369343065693431, "grad_norm": 1.3654939025524866, "learning_rate": 7.72702747421034e-06, "loss": 0.3861, "step": 3462 }, { "epoch": 0.3370316301703163, "grad_norm": 1.456076628879786, "learning_rate": 7.72570624902485e-06, "loss": 0.4548, "step": 3463 }, { "epoch": 0.33712895377128954, "grad_norm": 1.5979907891076075, "learning_rate": 7.724384752984253e-06, "loss": 0.5395, "step": 3464 }, { "epoch": 0.3372262773722628, "grad_norm": 1.479690145713753, "learning_rate": 7.723062986219871e-06, "loss": 0.4676, "step": 3465 }, { "epoch": 0.337323600973236, "grad_norm": 1.5794398807050158, "learning_rate": 7.721740948863044e-06, "loss": 0.6383, "step": 3466 }, { "epoch": 0.33742092457420925, "grad_norm": 1.5077566027780562, "learning_rate": 7.720418641045147e-06, "loss": 0.449, "step": 3467 }, { "epoch": 0.3375182481751825, "grad_norm": 1.536687422230877, "learning_rate": 7.719096062897578e-06, "loss": 0.3885, "step": 3468 }, { "epoch": 0.3376155717761557, "grad_norm": 1.4136867972495795, "learning_rate": 7.717773214551762e-06, "loss": 0.4262, "step": 3469 }, { "epoch": 0.33771289537712895, "grad_norm": 1.7521028146405362, "learning_rate": 7.71645009613915e-06, "loss": 0.3553, "step": 3470 }, { "epoch": 0.3378102189781022, "grad_norm": 1.28472614917785, "learning_rate": 7.715126707791223e-06, "loss": 0.5044, "step": 3471 }, { "epoch": 0.3379075425790754, "grad_norm": 1.5285157685020219, "learning_rate": 7.713803049639485e-06, "loss": 0.3067, "step": 3472 }, { "epoch": 0.33800486618004866, "grad_norm": 1.301311213128161, "learning_rate": 7.712479121815473e-06, "loss": 0.4036, "step": 3473 }, { "epoch": 0.3381021897810219, "grad_norm": 1.4164505494518185, "learning_rate": 7.711154924450741e-06, "loss": 0.3674, "step": 3474 }, { "epoch": 0.3381995133819951, "grad_norm": 1.2252498333021546, "learning_rate": 7.709830457676876e-06, "loss": 0.273, "step": 3475 }, { "epoch": 0.33829683698296836, "grad_norm": 1.4131843411362779, "learning_rate": 7.708505721625497e-06, "loss": 0.565, "step": 3476 }, { "epoch": 0.3383941605839416, "grad_norm": 1.6947767404353455, "learning_rate": 7.707180716428237e-06, "loss": 0.4248, "step": 3477 }, { "epoch": 0.3384914841849148, "grad_norm": 1.2913551057356365, "learning_rate": 7.705855442216766e-06, "loss": 0.3537, "step": 3478 }, { "epoch": 0.33858880778588807, "grad_norm": 1.150600446597589, "learning_rate": 7.704529899122776e-06, "loss": 0.3311, "step": 3479 }, { "epoch": 0.3386861313868613, "grad_norm": 1.2021311829209522, "learning_rate": 7.703204087277989e-06, "loss": 0.4163, "step": 3480 }, { "epoch": 0.3387834549878346, "grad_norm": 1.256321657329123, "learning_rate": 7.70187800681415e-06, "loss": 0.3609, "step": 3481 }, { "epoch": 0.3388807785888078, "grad_norm": 1.3378074898611492, "learning_rate": 7.70055165786303e-06, "loss": 0.3365, "step": 3482 }, { "epoch": 0.338978102189781, "grad_norm": 1.170985634605712, "learning_rate": 7.699225040556435e-06, "loss": 0.2524, "step": 3483 }, { "epoch": 0.3390754257907543, "grad_norm": 1.3653491057947706, "learning_rate": 7.697898155026188e-06, "loss": 0.282, "step": 3484 }, { "epoch": 0.3391727493917275, "grad_norm": 1.3228454670807173, "learning_rate": 7.696571001404143e-06, "loss": 0.4489, "step": 3485 }, { "epoch": 0.33927007299270073, "grad_norm": 1.568456967406488, "learning_rate": 7.695243579822179e-06, "loss": 0.672, "step": 3486 }, { "epoch": 0.339367396593674, "grad_norm": 1.5380107558510523, "learning_rate": 7.693915890412205e-06, "loss": 0.4099, "step": 3487 }, { "epoch": 0.3394647201946472, "grad_norm": 1.522434941547624, "learning_rate": 7.692587933306152e-06, "loss": 0.3895, "step": 3488 }, { "epoch": 0.33956204379562044, "grad_norm": 1.6424350744273293, "learning_rate": 7.691259708635983e-06, "loss": 0.4547, "step": 3489 }, { "epoch": 0.3396593673965937, "grad_norm": 1.1285195925950828, "learning_rate": 7.689931216533682e-06, "loss": 0.3205, "step": 3490 }, { "epoch": 0.3397566909975669, "grad_norm": 1.2211335178765037, "learning_rate": 7.68860245713126e-06, "loss": 0.2849, "step": 3491 }, { "epoch": 0.33985401459854014, "grad_norm": 1.3440619705365895, "learning_rate": 7.687273430560763e-06, "loss": 0.3493, "step": 3492 }, { "epoch": 0.3399513381995134, "grad_norm": 1.9270777796759784, "learning_rate": 7.685944136954252e-06, "loss": 0.3207, "step": 3493 }, { "epoch": 0.3400486618004866, "grad_norm": 1.3294429746967642, "learning_rate": 7.684614576443821e-06, "loss": 0.3215, "step": 3494 }, { "epoch": 0.34014598540145985, "grad_norm": 1.299183824061712, "learning_rate": 7.68328474916159e-06, "loss": 0.3565, "step": 3495 }, { "epoch": 0.3402433090024331, "grad_norm": 1.5283573621457682, "learning_rate": 7.681954655239703e-06, "loss": 0.4789, "step": 3496 }, { "epoch": 0.3403406326034063, "grad_norm": 1.4764556038728422, "learning_rate": 7.680624294810335e-06, "loss": 0.4079, "step": 3497 }, { "epoch": 0.34043795620437955, "grad_norm": 1.5139159959394148, "learning_rate": 7.679293668005683e-06, "loss": 0.5341, "step": 3498 }, { "epoch": 0.3405352798053528, "grad_norm": 1.5537302722123498, "learning_rate": 7.677962774957971e-06, "loss": 0.478, "step": 3499 }, { "epoch": 0.340632603406326, "grad_norm": 1.5827415849591213, "learning_rate": 7.676631615799453e-06, "loss": 0.359, "step": 3500 }, { "epoch": 0.34072992700729926, "grad_norm": 1.8389434879657838, "learning_rate": 7.675300190662406e-06, "loss": 0.3688, "step": 3501 }, { "epoch": 0.3408272506082725, "grad_norm": 1.219949474382852, "learning_rate": 7.673968499679134e-06, "loss": 0.3099, "step": 3502 }, { "epoch": 0.34092457420924577, "grad_norm": 1.5256977236182008, "learning_rate": 7.67263654298197e-06, "loss": 0.3838, "step": 3503 }, { "epoch": 0.34102189781021897, "grad_norm": 1.4659409139833723, "learning_rate": 7.671304320703269e-06, "loss": 0.4845, "step": 3504 }, { "epoch": 0.3411192214111922, "grad_norm": 1.667843387368496, "learning_rate": 7.669971832975417e-06, "loss": 0.5876, "step": 3505 }, { "epoch": 0.3412165450121655, "grad_norm": 1.346414364244355, "learning_rate": 7.668639079930821e-06, "loss": 0.4337, "step": 3506 }, { "epoch": 0.34131386861313867, "grad_norm": 2.022273962642171, "learning_rate": 7.66730606170192e-06, "loss": 0.472, "step": 3507 }, { "epoch": 0.3414111922141119, "grad_norm": 1.4021904736753643, "learning_rate": 7.665972778421175e-06, "loss": 0.3331, "step": 3508 }, { "epoch": 0.3415085158150852, "grad_norm": 1.3964469851310124, "learning_rate": 7.664639230221081e-06, "loss": 0.4151, "step": 3509 }, { "epoch": 0.3416058394160584, "grad_norm": 1.437167087649688, "learning_rate": 7.663305417234146e-06, "loss": 0.3751, "step": 3510 }, { "epoch": 0.34170316301703163, "grad_norm": 1.3813486918408102, "learning_rate": 7.661971339592913e-06, "loss": 0.2492, "step": 3511 }, { "epoch": 0.3418004866180049, "grad_norm": 2.213050506553265, "learning_rate": 7.660636997429953e-06, "loss": 0.2442, "step": 3512 }, { "epoch": 0.3418978102189781, "grad_norm": 1.4125007730667565, "learning_rate": 7.659302390877858e-06, "loss": 0.3901, "step": 3513 }, { "epoch": 0.34199513381995134, "grad_norm": 1.5262547193449572, "learning_rate": 7.657967520069253e-06, "loss": 0.5142, "step": 3514 }, { "epoch": 0.3420924574209246, "grad_norm": 1.419026298338398, "learning_rate": 7.65663238513678e-06, "loss": 0.4604, "step": 3515 }, { "epoch": 0.3421897810218978, "grad_norm": 1.354197849012477, "learning_rate": 7.655296986213114e-06, "loss": 0.3741, "step": 3516 }, { "epoch": 0.34228710462287104, "grad_norm": 1.3675444218814583, "learning_rate": 7.653961323430954e-06, "loss": 0.4636, "step": 3517 }, { "epoch": 0.3423844282238443, "grad_norm": 1.6737094690203054, "learning_rate": 7.652625396923027e-06, "loss": 0.5368, "step": 3518 }, { "epoch": 0.3424817518248175, "grad_norm": 1.5346015529843957, "learning_rate": 7.651289206822084e-06, "loss": 0.5808, "step": 3519 }, { "epoch": 0.34257907542579075, "grad_norm": 1.7677803050375525, "learning_rate": 7.649952753260901e-06, "loss": 0.4331, "step": 3520 }, { "epoch": 0.342676399026764, "grad_norm": 1.5130877149006923, "learning_rate": 7.648616036372288e-06, "loss": 0.4343, "step": 3521 }, { "epoch": 0.34277372262773725, "grad_norm": 1.6505485143132894, "learning_rate": 7.647279056289068e-06, "loss": 0.4256, "step": 3522 }, { "epoch": 0.34287104622871045, "grad_norm": 1.1213144756308453, "learning_rate": 7.6459418131441e-06, "loss": 0.248, "step": 3523 }, { "epoch": 0.3429683698296837, "grad_norm": 1.4978011226878554, "learning_rate": 7.64460430707027e-06, "loss": 0.4457, "step": 3524 }, { "epoch": 0.34306569343065696, "grad_norm": 1.724927585574558, "learning_rate": 7.643266538200485e-06, "loss": 0.5746, "step": 3525 }, { "epoch": 0.34316301703163016, "grad_norm": 1.4218186587395187, "learning_rate": 7.641928506667677e-06, "loss": 0.4628, "step": 3526 }, { "epoch": 0.3432603406326034, "grad_norm": 1.1979163238967183, "learning_rate": 7.640590212604813e-06, "loss": 0.3276, "step": 3527 }, { "epoch": 0.34335766423357666, "grad_norm": 1.5459257353579257, "learning_rate": 7.639251656144873e-06, "loss": 0.5543, "step": 3528 }, { "epoch": 0.34345498783454986, "grad_norm": 2.168708523840157, "learning_rate": 7.637912837420876e-06, "loss": 0.4451, "step": 3529 }, { "epoch": 0.3435523114355231, "grad_norm": 1.3295572972455665, "learning_rate": 7.63657375656586e-06, "loss": 0.3659, "step": 3530 }, { "epoch": 0.34364963503649637, "grad_norm": 1.3940981237720802, "learning_rate": 7.635234413712886e-06, "loss": 0.3305, "step": 3531 }, { "epoch": 0.34374695863746957, "grad_norm": 1.668696796269181, "learning_rate": 7.63389480899505e-06, "loss": 0.262, "step": 3532 }, { "epoch": 0.3438442822384428, "grad_norm": 1.3235762846506705, "learning_rate": 7.632554942545468e-06, "loss": 0.354, "step": 3533 }, { "epoch": 0.3439416058394161, "grad_norm": 1.774595647074867, "learning_rate": 7.631214814497283e-06, "loss": 0.4181, "step": 3534 }, { "epoch": 0.3440389294403893, "grad_norm": 1.3699960689070612, "learning_rate": 7.629874424983664e-06, "loss": 0.4893, "step": 3535 }, { "epoch": 0.3441362530413625, "grad_norm": 1.0054517790481798, "learning_rate": 7.628533774137809e-06, "loss": 0.2678, "step": 3536 }, { "epoch": 0.3442335766423358, "grad_norm": 1.2585804176689974, "learning_rate": 7.627192862092936e-06, "loss": 0.3145, "step": 3537 }, { "epoch": 0.344330900243309, "grad_norm": 1.4486458769434574, "learning_rate": 7.625851688982293e-06, "loss": 0.5018, "step": 3538 }, { "epoch": 0.34442822384428223, "grad_norm": 1.6761010493950546, "learning_rate": 7.624510254939155e-06, "loss": 0.5786, "step": 3539 }, { "epoch": 0.3445255474452555, "grad_norm": 1.1326870552848924, "learning_rate": 7.623168560096819e-06, "loss": 0.2714, "step": 3540 }, { "epoch": 0.3446228710462287, "grad_norm": 1.380514271735255, "learning_rate": 7.62182660458861e-06, "loss": 0.4435, "step": 3541 }, { "epoch": 0.34472019464720194, "grad_norm": 1.3847938610044406, "learning_rate": 7.620484388547881e-06, "loss": 0.456, "step": 3542 }, { "epoch": 0.3448175182481752, "grad_norm": 1.4774572497281164, "learning_rate": 7.619141912108008e-06, "loss": 0.4016, "step": 3543 }, { "epoch": 0.34491484184914845, "grad_norm": 1.4155087320104913, "learning_rate": 7.617799175402392e-06, "loss": 0.4672, "step": 3544 }, { "epoch": 0.34501216545012164, "grad_norm": 1.2347049030843364, "learning_rate": 7.616456178564463e-06, "loss": 0.4701, "step": 3545 }, { "epoch": 0.3451094890510949, "grad_norm": 1.3617965352115597, "learning_rate": 7.615112921727677e-06, "loss": 0.4411, "step": 3546 }, { "epoch": 0.34520681265206815, "grad_norm": 1.5080403368283026, "learning_rate": 7.613769405025511e-06, "loss": 0.446, "step": 3547 }, { "epoch": 0.34530413625304135, "grad_norm": 1.3587460871748047, "learning_rate": 7.612425628591473e-06, "loss": 0.4618, "step": 3548 }, { "epoch": 0.3454014598540146, "grad_norm": 1.5594894397765757, "learning_rate": 7.611081592559095e-06, "loss": 0.6454, "step": 3549 }, { "epoch": 0.34549878345498786, "grad_norm": 1.5472776871393785, "learning_rate": 7.609737297061934e-06, "loss": 0.4209, "step": 3550 }, { "epoch": 0.34559610705596105, "grad_norm": 1.1387925779252865, "learning_rate": 7.608392742233573e-06, "loss": 0.2542, "step": 3551 }, { "epoch": 0.3456934306569343, "grad_norm": 2.1308610533262753, "learning_rate": 7.6070479282076226e-06, "loss": 0.4232, "step": 3552 }, { "epoch": 0.34579075425790756, "grad_norm": 1.655772230226882, "learning_rate": 7.605702855117717e-06, "loss": 0.316, "step": 3553 }, { "epoch": 0.34588807785888076, "grad_norm": 1.2773133253854652, "learning_rate": 7.604357523097518e-06, "loss": 0.3933, "step": 3554 }, { "epoch": 0.345985401459854, "grad_norm": 1.5157476945746606, "learning_rate": 7.6030119322807105e-06, "loss": 0.4895, "step": 3555 }, { "epoch": 0.34608272506082727, "grad_norm": 1.5384836415390328, "learning_rate": 7.601666082801007e-06, "loss": 0.4571, "step": 3556 }, { "epoch": 0.34618004866180047, "grad_norm": 1.3494696317056387, "learning_rate": 7.600319974792145e-06, "loss": 0.3585, "step": 3557 }, { "epoch": 0.3462773722627737, "grad_norm": 1.5639692646479821, "learning_rate": 7.59897360838789e-06, "loss": 0.4913, "step": 3558 }, { "epoch": 0.346374695863747, "grad_norm": 1.5771201683321747, "learning_rate": 7.59762698372203e-06, "loss": 0.644, "step": 3559 }, { "epoch": 0.34647201946472017, "grad_norm": 1.445755844165086, "learning_rate": 7.596280100928379e-06, "loss": 0.4662, "step": 3560 }, { "epoch": 0.3465693430656934, "grad_norm": 1.0169653044814775, "learning_rate": 7.59493296014078e-06, "loss": 0.1873, "step": 3561 }, { "epoch": 0.3466666666666667, "grad_norm": 1.3573861316393436, "learning_rate": 7.593585561493098e-06, "loss": 0.3621, "step": 3562 }, { "epoch": 0.3467639902676399, "grad_norm": 1.3679634725925294, "learning_rate": 7.592237905119224e-06, "loss": 0.3714, "step": 3563 }, { "epoch": 0.34686131386861313, "grad_norm": 1.6153262572349345, "learning_rate": 7.590889991153076e-06, "loss": 0.1934, "step": 3564 }, { "epoch": 0.3469586374695864, "grad_norm": 1.3501706757209104, "learning_rate": 7.589541819728597e-06, "loss": 0.3771, "step": 3565 }, { "epoch": 0.34705596107055964, "grad_norm": 1.4359834383485004, "learning_rate": 7.588193390979756e-06, "loss": 0.4021, "step": 3566 }, { "epoch": 0.34715328467153284, "grad_norm": 1.6178918309073458, "learning_rate": 7.5868447050405456e-06, "loss": 0.5326, "step": 3567 }, { "epoch": 0.3472506082725061, "grad_norm": 1.2161304982080974, "learning_rate": 7.585495762044989e-06, "loss": 0.3215, "step": 3568 }, { "epoch": 0.34734793187347934, "grad_norm": 1.2514780842366786, "learning_rate": 7.584146562127128e-06, "loss": 0.2619, "step": 3569 }, { "epoch": 0.34744525547445254, "grad_norm": 1.292347877079837, "learning_rate": 7.5827971054210334e-06, "loss": 0.3722, "step": 3570 }, { "epoch": 0.3475425790754258, "grad_norm": 1.428500991174703, "learning_rate": 7.581447392060806e-06, "loss": 0.4681, "step": 3571 }, { "epoch": 0.34763990267639905, "grad_norm": 1.6187169310973553, "learning_rate": 7.5800974221805635e-06, "loss": 0.3123, "step": 3572 }, { "epoch": 0.34773722627737225, "grad_norm": 1.2828962403261588, "learning_rate": 7.5787471959144535e-06, "loss": 0.3426, "step": 3573 }, { "epoch": 0.3478345498783455, "grad_norm": 1.3424794600068044, "learning_rate": 7.577396713396649e-06, "loss": 0.2749, "step": 3574 }, { "epoch": 0.34793187347931875, "grad_norm": 1.311429995391481, "learning_rate": 7.576045974761352e-06, "loss": 0.1912, "step": 3575 }, { "epoch": 0.34802919708029195, "grad_norm": 1.9475934853598964, "learning_rate": 7.57469498014278e-06, "loss": 0.3568, "step": 3576 }, { "epoch": 0.3481265206812652, "grad_norm": 1.5712590946732445, "learning_rate": 7.573343729675187e-06, "loss": 0.4361, "step": 3577 }, { "epoch": 0.34822384428223846, "grad_norm": 1.3460143771601136, "learning_rate": 7.5719922234928435e-06, "loss": 0.3258, "step": 3578 }, { "epoch": 0.34832116788321166, "grad_norm": 1.3457032135851295, "learning_rate": 7.5706404617300544e-06, "loss": 0.2679, "step": 3579 }, { "epoch": 0.3484184914841849, "grad_norm": 1.602637400100207, "learning_rate": 7.569288444521141e-06, "loss": 0.4349, "step": 3580 }, { "epoch": 0.34851581508515816, "grad_norm": 1.193898879748489, "learning_rate": 7.567936172000456e-06, "loss": 0.3823, "step": 3581 }, { "epoch": 0.34861313868613136, "grad_norm": 1.7213934943419156, "learning_rate": 7.5665836443023764e-06, "loss": 0.4477, "step": 3582 }, { "epoch": 0.3487104622871046, "grad_norm": 2.0896889317289262, "learning_rate": 7.5652308615613025e-06, "loss": 0.548, "step": 3583 }, { "epoch": 0.34880778588807787, "grad_norm": 1.2133584709672542, "learning_rate": 7.563877823911661e-06, "loss": 0.3342, "step": 3584 }, { "epoch": 0.34890510948905107, "grad_norm": 1.3518803834565474, "learning_rate": 7.562524531487902e-06, "loss": 0.4428, "step": 3585 }, { "epoch": 0.3490024330900243, "grad_norm": 1.5646028482749865, "learning_rate": 7.561170984424509e-06, "loss": 0.4805, "step": 3586 }, { "epoch": 0.3490997566909976, "grad_norm": 1.725109749219042, "learning_rate": 7.5598171828559775e-06, "loss": 0.3953, "step": 3587 }, { "epoch": 0.34919708029197083, "grad_norm": 1.6751385503374157, "learning_rate": 7.558463126916842e-06, "loss": 0.3466, "step": 3588 }, { "epoch": 0.349294403892944, "grad_norm": 1.577501457080682, "learning_rate": 7.557108816741651e-06, "loss": 0.387, "step": 3589 }, { "epoch": 0.3493917274939173, "grad_norm": 1.5319961753926956, "learning_rate": 7.5557542524649866e-06, "loss": 0.2916, "step": 3590 }, { "epoch": 0.34948905109489053, "grad_norm": 2.5209551905243845, "learning_rate": 7.554399434221449e-06, "loss": 0.2941, "step": 3591 }, { "epoch": 0.34958637469586373, "grad_norm": 1.1980216546975295, "learning_rate": 7.553044362145672e-06, "loss": 0.3867, "step": 3592 }, { "epoch": 0.349683698296837, "grad_norm": 1.425370917650529, "learning_rate": 7.551689036372306e-06, "loss": 0.4788, "step": 3593 }, { "epoch": 0.34978102189781024, "grad_norm": 1.5504040775883203, "learning_rate": 7.550333457036032e-06, "loss": 0.5355, "step": 3594 }, { "epoch": 0.34987834549878344, "grad_norm": 1.253372887044746, "learning_rate": 7.5489776242715564e-06, "loss": 0.4783, "step": 3595 }, { "epoch": 0.3499756690997567, "grad_norm": 1.4985868632126829, "learning_rate": 7.547621538213607e-06, "loss": 0.4225, "step": 3596 }, { "epoch": 0.35007299270072995, "grad_norm": 1.4435970539146459, "learning_rate": 7.5462651989969385e-06, "loss": 0.3745, "step": 3597 }, { "epoch": 0.35017031630170314, "grad_norm": 1.3981266208958172, "learning_rate": 7.5449086067563314e-06, "loss": 0.3456, "step": 3598 }, { "epoch": 0.3502676399026764, "grad_norm": 1.8315759023798575, "learning_rate": 7.543551761626594e-06, "loss": 0.5542, "step": 3599 }, { "epoch": 0.35036496350364965, "grad_norm": 1.0610077420382762, "learning_rate": 7.542194663742553e-06, "loss": 0.3009, "step": 3600 }, { "epoch": 0.35046228710462285, "grad_norm": 1.4599599576710758, "learning_rate": 7.5408373132390674e-06, "loss": 0.3322, "step": 3601 }, { "epoch": 0.3505596107055961, "grad_norm": 1.357290752856556, "learning_rate": 7.539479710251014e-06, "loss": 0.4219, "step": 3602 }, { "epoch": 0.35065693430656936, "grad_norm": 1.3466674715612543, "learning_rate": 7.538121854913303e-06, "loss": 0.4688, "step": 3603 }, { "epoch": 0.35075425790754255, "grad_norm": 1.9207221814789595, "learning_rate": 7.536763747360863e-06, "loss": 0.5269, "step": 3604 }, { "epoch": 0.3508515815085158, "grad_norm": 1.4616085693980927, "learning_rate": 7.535405387728649e-06, "loss": 0.5216, "step": 3605 }, { "epoch": 0.35094890510948906, "grad_norm": 1.638466859707607, "learning_rate": 7.534046776151645e-06, "loss": 0.4155, "step": 3606 }, { "epoch": 0.35104622871046226, "grad_norm": 1.8610570015640786, "learning_rate": 7.532687912764853e-06, "loss": 0.4385, "step": 3607 }, { "epoch": 0.3511435523114355, "grad_norm": 1.390595082164788, "learning_rate": 7.531328797703308e-06, "loss": 0.4791, "step": 3608 }, { "epoch": 0.35124087591240877, "grad_norm": 1.4107542592778146, "learning_rate": 7.529969431102063e-06, "loss": 0.5517, "step": 3609 }, { "epoch": 0.351338199513382, "grad_norm": 1.2873358057005926, "learning_rate": 7.528609813096203e-06, "loss": 0.2964, "step": 3610 }, { "epoch": 0.3514355231143552, "grad_norm": 1.585587787123838, "learning_rate": 7.527249943820831e-06, "loss": 0.5375, "step": 3611 }, { "epoch": 0.3515328467153285, "grad_norm": 1.307663133994867, "learning_rate": 7.525889823411076e-06, "loss": 0.2655, "step": 3612 }, { "epoch": 0.3516301703163017, "grad_norm": 1.3238605926395333, "learning_rate": 7.524529452002099e-06, "loss": 0.3678, "step": 3613 }, { "epoch": 0.3517274939172749, "grad_norm": 1.1603226975769314, "learning_rate": 7.523168829729078e-06, "loss": 0.294, "step": 3614 }, { "epoch": 0.3518248175182482, "grad_norm": 0.9756816706305802, "learning_rate": 7.52180795672722e-06, "loss": 0.1748, "step": 3615 }, { "epoch": 0.35192214111922143, "grad_norm": 1.6381757346588681, "learning_rate": 7.520446833131756e-06, "loss": 0.3671, "step": 3616 }, { "epoch": 0.35201946472019463, "grad_norm": 1.6012410655431273, "learning_rate": 7.51908545907794e-06, "loss": 0.7845, "step": 3617 }, { "epoch": 0.3521167883211679, "grad_norm": 1.513216444842459, "learning_rate": 7.517723834701054e-06, "loss": 0.5675, "step": 3618 }, { "epoch": 0.35221411192214114, "grad_norm": 1.5876840830126697, "learning_rate": 7.516361960136403e-06, "loss": 0.4117, "step": 3619 }, { "epoch": 0.35231143552311434, "grad_norm": 1.2568334714306615, "learning_rate": 7.514999835519318e-06, "loss": 0.3623, "step": 3620 }, { "epoch": 0.3524087591240876, "grad_norm": 1.430201120305146, "learning_rate": 7.513637460985153e-06, "loss": 0.4618, "step": 3621 }, { "epoch": 0.35250608272506084, "grad_norm": 1.6954496126059004, "learning_rate": 7.512274836669288e-06, "loss": 0.4018, "step": 3622 }, { "epoch": 0.35260340632603404, "grad_norm": 1.3717482871101743, "learning_rate": 7.510911962707128e-06, "loss": 0.5364, "step": 3623 }, { "epoch": 0.3527007299270073, "grad_norm": 1.3632873709904672, "learning_rate": 7.509548839234102e-06, "loss": 0.3786, "step": 3624 }, { "epoch": 0.35279805352798055, "grad_norm": 1.5766225782578598, "learning_rate": 7.508185466385667e-06, "loss": 0.6176, "step": 3625 }, { "epoch": 0.35289537712895375, "grad_norm": 1.6300483194455977, "learning_rate": 7.506821844297301e-06, "loss": 0.4239, "step": 3626 }, { "epoch": 0.352992700729927, "grad_norm": 1.244455438425305, "learning_rate": 7.505457973104506e-06, "loss": 0.3627, "step": 3627 }, { "epoch": 0.35309002433090025, "grad_norm": 1.441382927434537, "learning_rate": 7.504093852942815e-06, "loss": 0.3853, "step": 3628 }, { "epoch": 0.35318734793187345, "grad_norm": 1.3700542367515711, "learning_rate": 7.502729483947776e-06, "loss": 0.5312, "step": 3629 }, { "epoch": 0.3532846715328467, "grad_norm": 1.3101033627157612, "learning_rate": 7.50136486625497e-06, "loss": 0.3997, "step": 3630 }, { "epoch": 0.35338199513381996, "grad_norm": 1.4700150418971674, "learning_rate": 7.500000000000001e-06, "loss": 0.4536, "step": 3631 }, { "epoch": 0.3534793187347932, "grad_norm": 1.4399856969334963, "learning_rate": 7.4986348853184944e-06, "loss": 0.4301, "step": 3632 }, { "epoch": 0.3535766423357664, "grad_norm": 1.5002693093456838, "learning_rate": 7.497269522346105e-06, "loss": 0.5339, "step": 3633 }, { "epoch": 0.35367396593673966, "grad_norm": 1.4332148495576913, "learning_rate": 7.4959039112185065e-06, "loss": 0.4218, "step": 3634 }, { "epoch": 0.3537712895377129, "grad_norm": 1.430533403803271, "learning_rate": 7.494538052071403e-06, "loss": 0.4658, "step": 3635 }, { "epoch": 0.3538686131386861, "grad_norm": 1.7972458166545215, "learning_rate": 7.4931719450405185e-06, "loss": 0.2642, "step": 3636 }, { "epoch": 0.35396593673965937, "grad_norm": 1.138544720049106, "learning_rate": 7.491805590261607e-06, "loss": 0.3429, "step": 3637 }, { "epoch": 0.3540632603406326, "grad_norm": 1.9786831181417208, "learning_rate": 7.490438987870443e-06, "loss": 0.4378, "step": 3638 }, { "epoch": 0.3541605839416058, "grad_norm": 1.136048581491554, "learning_rate": 7.489072138002825e-06, "loss": 0.2668, "step": 3639 }, { "epoch": 0.3542579075425791, "grad_norm": 1.3441611259868125, "learning_rate": 7.4877050407945796e-06, "loss": 0.468, "step": 3640 }, { "epoch": 0.35435523114355233, "grad_norm": 1.1560682205410908, "learning_rate": 7.486337696381554e-06, "loss": 0.3363, "step": 3641 }, { "epoch": 0.3544525547445255, "grad_norm": 1.0393622403442615, "learning_rate": 7.484970104899624e-06, "loss": 0.2803, "step": 3642 }, { "epoch": 0.3545498783454988, "grad_norm": 1.514160943546232, "learning_rate": 7.483602266484686e-06, "loss": 0.4441, "step": 3643 }, { "epoch": 0.35464720194647203, "grad_norm": 1.580897624441714, "learning_rate": 7.482234181272666e-06, "loss": 0.3502, "step": 3644 }, { "epoch": 0.35474452554744523, "grad_norm": 1.2716040020289627, "learning_rate": 7.480865849399508e-06, "loss": 0.3551, "step": 3645 }, { "epoch": 0.3548418491484185, "grad_norm": 1.6397132883430414, "learning_rate": 7.4794972710011885e-06, "loss": 0.54, "step": 3646 }, { "epoch": 0.35493917274939174, "grad_norm": 1.4383012777464312, "learning_rate": 7.478128446213699e-06, "loss": 0.3954, "step": 3647 }, { "epoch": 0.35503649635036494, "grad_norm": 1.4020794377643642, "learning_rate": 7.476759375173063e-06, "loss": 0.2869, "step": 3648 }, { "epoch": 0.3551338199513382, "grad_norm": 1.420695895411819, "learning_rate": 7.475390058015326e-06, "loss": 0.4162, "step": 3649 }, { "epoch": 0.35523114355231145, "grad_norm": 1.3856407596171976, "learning_rate": 7.474020494876556e-06, "loss": 0.403, "step": 3650 }, { "epoch": 0.35532846715328464, "grad_norm": 1.6355333792797258, "learning_rate": 7.472650685892851e-06, "loss": 0.6147, "step": 3651 }, { "epoch": 0.3554257907542579, "grad_norm": 1.3284128213805295, "learning_rate": 7.471280631200325e-06, "loss": 0.2128, "step": 3652 }, { "epoch": 0.35552311435523115, "grad_norm": 1.357840581851552, "learning_rate": 7.469910330935126e-06, "loss": 0.3483, "step": 3653 }, { "epoch": 0.3556204379562044, "grad_norm": 1.2770810104938104, "learning_rate": 7.468539785233417e-06, "loss": 0.2812, "step": 3654 }, { "epoch": 0.3557177615571776, "grad_norm": 1.7995385373189825, "learning_rate": 7.467168994231394e-06, "loss": 0.2944, "step": 3655 }, { "epoch": 0.35581508515815086, "grad_norm": 2.0088307214551846, "learning_rate": 7.465797958065272e-06, "loss": 0.3204, "step": 3656 }, { "epoch": 0.3559124087591241, "grad_norm": 1.3573976363618303, "learning_rate": 7.46442667687129e-06, "loss": 0.3666, "step": 3657 }, { "epoch": 0.3560097323600973, "grad_norm": 1.4772002442457595, "learning_rate": 7.463055150785715e-06, "loss": 0.3756, "step": 3658 }, { "epoch": 0.35610705596107056, "grad_norm": 1.7169887882459811, "learning_rate": 7.461683379944835e-06, "loss": 0.6085, "step": 3659 }, { "epoch": 0.3562043795620438, "grad_norm": 1.753449718301559, "learning_rate": 7.460311364484964e-06, "loss": 0.691, "step": 3660 }, { "epoch": 0.356301703163017, "grad_norm": 1.9142906208727815, "learning_rate": 7.458939104542442e-06, "loss": 0.5569, "step": 3661 }, { "epoch": 0.35639902676399027, "grad_norm": 1.564692980111203, "learning_rate": 7.457566600253631e-06, "loss": 0.417, "step": 3662 }, { "epoch": 0.3564963503649635, "grad_norm": 1.4241868392709103, "learning_rate": 7.4561938517549136e-06, "loss": 0.3702, "step": 3663 }, { "epoch": 0.3565936739659367, "grad_norm": 1.0774182753758312, "learning_rate": 7.4548208591827056e-06, "loss": 0.325, "step": 3664 }, { "epoch": 0.35669099756691, "grad_norm": 1.3486569434899873, "learning_rate": 7.4534476226734384e-06, "loss": 0.3906, "step": 3665 }, { "epoch": 0.3567883211678832, "grad_norm": 1.2303476425395732, "learning_rate": 7.452074142363573e-06, "loss": 0.3073, "step": 3666 }, { "epoch": 0.3568856447688564, "grad_norm": 1.40711240815019, "learning_rate": 7.450700418389594e-06, "loss": 0.386, "step": 3667 }, { "epoch": 0.3569829683698297, "grad_norm": 1.4572695621598242, "learning_rate": 7.449326450888007e-06, "loss": 0.5228, "step": 3668 }, { "epoch": 0.35708029197080293, "grad_norm": 1.4210722617044638, "learning_rate": 7.4479522399953465e-06, "loss": 0.4409, "step": 3669 }, { "epoch": 0.35717761557177613, "grad_norm": 1.508447701068191, "learning_rate": 7.446577785848166e-06, "loss": 0.4571, "step": 3670 }, { "epoch": 0.3572749391727494, "grad_norm": 1.3197481653540741, "learning_rate": 7.445203088583047e-06, "loss": 0.3886, "step": 3671 }, { "epoch": 0.35737226277372264, "grad_norm": 1.7619697747209029, "learning_rate": 7.443828148336594e-06, "loss": 0.5652, "step": 3672 }, { "epoch": 0.3574695863746959, "grad_norm": 1.2741586714286437, "learning_rate": 7.442452965245437e-06, "loss": 0.2068, "step": 3673 }, { "epoch": 0.3575669099756691, "grad_norm": 1.6171437133168665, "learning_rate": 7.4410775394462285e-06, "loss": 0.4785, "step": 3674 }, { "epoch": 0.35766423357664234, "grad_norm": 1.3575662879144605, "learning_rate": 7.4397018710756415e-06, "loss": 0.3851, "step": 3675 }, { "epoch": 0.3577615571776156, "grad_norm": 1.5452516302727513, "learning_rate": 7.438325960270382e-06, "loss": 0.4154, "step": 3676 }, { "epoch": 0.3578588807785888, "grad_norm": 1.4057841059024094, "learning_rate": 7.436949807167172e-06, "loss": 0.4309, "step": 3677 }, { "epoch": 0.35795620437956205, "grad_norm": 1.3402694159440718, "learning_rate": 7.435573411902763e-06, "loss": 0.3905, "step": 3678 }, { "epoch": 0.3580535279805353, "grad_norm": 1.899972993257223, "learning_rate": 7.434196774613926e-06, "loss": 0.324, "step": 3679 }, { "epoch": 0.3581508515815085, "grad_norm": 1.517104694845036, "learning_rate": 7.432819895437461e-06, "loss": 0.5038, "step": 3680 }, { "epoch": 0.35824817518248175, "grad_norm": 1.5026505417391594, "learning_rate": 7.431442774510186e-06, "loss": 0.5613, "step": 3681 }, { "epoch": 0.358345498783455, "grad_norm": 1.6998698072507739, "learning_rate": 7.4300654119689475e-06, "loss": 0.4362, "step": 3682 }, { "epoch": 0.3584428223844282, "grad_norm": 1.5499969728501817, "learning_rate": 7.4286878079506175e-06, "loss": 0.5288, "step": 3683 }, { "epoch": 0.35854014598540146, "grad_norm": 1.7237862062144214, "learning_rate": 7.4273099625920866e-06, "loss": 0.2981, "step": 3684 }, { "epoch": 0.3586374695863747, "grad_norm": 1.5399796130329546, "learning_rate": 7.4259318760302725e-06, "loss": 0.3486, "step": 3685 }, { "epoch": 0.3587347931873479, "grad_norm": 1.734733819358604, "learning_rate": 7.424553548402116e-06, "loss": 0.4681, "step": 3686 }, { "epoch": 0.35883211678832116, "grad_norm": 1.4858189837407711, "learning_rate": 7.423174979844583e-06, "loss": 0.4624, "step": 3687 }, { "epoch": 0.3589294403892944, "grad_norm": 2.0317094723346556, "learning_rate": 7.421796170494664e-06, "loss": 0.421, "step": 3688 }, { "epoch": 0.3590267639902676, "grad_norm": 1.6561811900690029, "learning_rate": 7.42041712048937e-06, "loss": 0.4593, "step": 3689 }, { "epoch": 0.35912408759124087, "grad_norm": 1.3298486632098465, "learning_rate": 7.41903782996574e-06, "loss": 0.3328, "step": 3690 }, { "epoch": 0.3592214111922141, "grad_norm": 1.6636689486346872, "learning_rate": 7.417658299060834e-06, "loss": 0.4999, "step": 3691 }, { "epoch": 0.3593187347931873, "grad_norm": 1.566657143760038, "learning_rate": 7.4162785279117354e-06, "loss": 0.6945, "step": 3692 }, { "epoch": 0.3594160583941606, "grad_norm": 1.1976745803136968, "learning_rate": 7.414898516655555e-06, "loss": 0.3368, "step": 3693 }, { "epoch": 0.35951338199513383, "grad_norm": 1.5599198973704254, "learning_rate": 7.413518265429427e-06, "loss": 0.3875, "step": 3694 }, { "epoch": 0.3596107055961071, "grad_norm": 1.45764719852823, "learning_rate": 7.412137774370502e-06, "loss": 0.4665, "step": 3695 }, { "epoch": 0.3597080291970803, "grad_norm": 1.2276949665906944, "learning_rate": 7.410757043615966e-06, "loss": 0.285, "step": 3696 }, { "epoch": 0.35980535279805353, "grad_norm": 1.5359072801599356, "learning_rate": 7.40937607330302e-06, "loss": 0.376, "step": 3697 }, { "epoch": 0.3599026763990268, "grad_norm": 1.209800432472448, "learning_rate": 7.4079948635688925e-06, "loss": 0.3298, "step": 3698 }, { "epoch": 0.36, "grad_norm": 1.4892209107876908, "learning_rate": 7.406613414550835e-06, "loss": 0.3474, "step": 3699 }, { "epoch": 0.36009732360097324, "grad_norm": 1.4539822746623663, "learning_rate": 7.405231726386124e-06, "loss": 0.3756, "step": 3700 }, { "epoch": 0.3601946472019465, "grad_norm": 1.4980607212426436, "learning_rate": 7.403849799212057e-06, "loss": 0.4841, "step": 3701 }, { "epoch": 0.3602919708029197, "grad_norm": 1.3698188527628243, "learning_rate": 7.40246763316596e-06, "loss": 0.3881, "step": 3702 }, { "epoch": 0.36038929440389295, "grad_norm": 1.3253201655674622, "learning_rate": 7.401085228385177e-06, "loss": 0.3933, "step": 3703 }, { "epoch": 0.3604866180048662, "grad_norm": 1.4396239636102728, "learning_rate": 7.399702585007077e-06, "loss": 0.408, "step": 3704 }, { "epoch": 0.3605839416058394, "grad_norm": 1.5453004626353954, "learning_rate": 7.398319703169058e-06, "loss": 0.5276, "step": 3705 }, { "epoch": 0.36068126520681265, "grad_norm": 1.403106821872738, "learning_rate": 7.396936583008535e-06, "loss": 0.4362, "step": 3706 }, { "epoch": 0.3607785888077859, "grad_norm": 1.6766339602355043, "learning_rate": 7.395553224662952e-06, "loss": 0.5511, "step": 3707 }, { "epoch": 0.3608759124087591, "grad_norm": 1.6187272412677542, "learning_rate": 7.394169628269771e-06, "loss": 0.455, "step": 3708 }, { "epoch": 0.36097323600973236, "grad_norm": 1.2227561836718068, "learning_rate": 7.392785793966483e-06, "loss": 0.3885, "step": 3709 }, { "epoch": 0.3610705596107056, "grad_norm": 1.3724156281155415, "learning_rate": 7.391401721890599e-06, "loss": 0.2664, "step": 3710 }, { "epoch": 0.3611678832116788, "grad_norm": 1.2230528654367878, "learning_rate": 7.390017412179658e-06, "loss": 0.3302, "step": 3711 }, { "epoch": 0.36126520681265206, "grad_norm": 1.1928082331280525, "learning_rate": 7.388632864971217e-06, "loss": 0.2629, "step": 3712 }, { "epoch": 0.3613625304136253, "grad_norm": 1.6037454753999223, "learning_rate": 7.38724808040286e-06, "loss": 0.3905, "step": 3713 }, { "epoch": 0.3614598540145985, "grad_norm": 1.3621648463732532, "learning_rate": 7.3858630586121926e-06, "loss": 0.403, "step": 3714 }, { "epoch": 0.36155717761557177, "grad_norm": 1.829925954185779, "learning_rate": 7.384477799736848e-06, "loss": 0.5973, "step": 3715 }, { "epoch": 0.361654501216545, "grad_norm": 1.4268953334113146, "learning_rate": 7.383092303914479e-06, "loss": 0.4476, "step": 3716 }, { "epoch": 0.3617518248175183, "grad_norm": 1.4218010516377573, "learning_rate": 7.381706571282762e-06, "loss": 0.3333, "step": 3717 }, { "epoch": 0.3618491484184915, "grad_norm": 1.4149794161212645, "learning_rate": 7.3803206019794004e-06, "loss": 0.4466, "step": 3718 }, { "epoch": 0.3619464720194647, "grad_norm": 1.3528540289292441, "learning_rate": 7.378934396142116e-06, "loss": 0.4781, "step": 3719 }, { "epoch": 0.362043795620438, "grad_norm": 1.7444608763847531, "learning_rate": 7.3775479539086595e-06, "loss": 0.3911, "step": 3720 }, { "epoch": 0.3621411192214112, "grad_norm": 1.3328673124502177, "learning_rate": 7.376161275416802e-06, "loss": 0.4312, "step": 3721 }, { "epoch": 0.36223844282238443, "grad_norm": 1.5999365764206797, "learning_rate": 7.374774360804337e-06, "loss": 0.4827, "step": 3722 }, { "epoch": 0.3623357664233577, "grad_norm": 1.3911965727851072, "learning_rate": 7.3733872102090846e-06, "loss": 0.3357, "step": 3723 }, { "epoch": 0.3624330900243309, "grad_norm": 2.108972016446877, "learning_rate": 7.371999823768885e-06, "loss": 0.534, "step": 3724 }, { "epoch": 0.36253041362530414, "grad_norm": 1.3184107339533155, "learning_rate": 7.370612201621606e-06, "loss": 0.3319, "step": 3725 }, { "epoch": 0.3626277372262774, "grad_norm": 1.5480195252175315, "learning_rate": 7.369224343905135e-06, "loss": 0.4569, "step": 3726 }, { "epoch": 0.3627250608272506, "grad_norm": 1.5137208598424667, "learning_rate": 7.3678362507573855e-06, "loss": 0.4966, "step": 3727 }, { "epoch": 0.36282238442822384, "grad_norm": 1.9044576254266983, "learning_rate": 7.366447922316292e-06, "loss": 0.3348, "step": 3728 }, { "epoch": 0.3629197080291971, "grad_norm": 1.5245853892303196, "learning_rate": 7.365059358719814e-06, "loss": 0.4253, "step": 3729 }, { "epoch": 0.3630170316301703, "grad_norm": 1.122262909962616, "learning_rate": 7.3636705601059344e-06, "loss": 0.3222, "step": 3730 }, { "epoch": 0.36311435523114355, "grad_norm": 1.8386225119764608, "learning_rate": 7.362281526612657e-06, "loss": 0.2037, "step": 3731 }, { "epoch": 0.3632116788321168, "grad_norm": 1.4527104104207418, "learning_rate": 7.360892258378014e-06, "loss": 0.4542, "step": 3732 }, { "epoch": 0.36330900243309, "grad_norm": 1.2351545455698876, "learning_rate": 7.359502755540054e-06, "loss": 0.3215, "step": 3733 }, { "epoch": 0.36340632603406325, "grad_norm": 1.2618923481187536, "learning_rate": 7.358113018236856e-06, "loss": 0.3527, "step": 3734 }, { "epoch": 0.3635036496350365, "grad_norm": 1.659524308148738, "learning_rate": 7.356723046606519e-06, "loss": 0.4318, "step": 3735 }, { "epoch": 0.3636009732360097, "grad_norm": 1.565399683654683, "learning_rate": 7.355332840787164e-06, "loss": 0.5299, "step": 3736 }, { "epoch": 0.36369829683698296, "grad_norm": 1.6178264737718506, "learning_rate": 7.353942400916936e-06, "loss": 0.397, "step": 3737 }, { "epoch": 0.3637956204379562, "grad_norm": 1.6534303379570627, "learning_rate": 7.352551727134005e-06, "loss": 0.6081, "step": 3738 }, { "epoch": 0.36389294403892947, "grad_norm": 1.247872353830009, "learning_rate": 7.351160819576564e-06, "loss": 0.3425, "step": 3739 }, { "epoch": 0.36399026763990266, "grad_norm": 1.5020180478592695, "learning_rate": 7.349769678382826e-06, "loss": 0.5016, "step": 3740 }, { "epoch": 0.3640875912408759, "grad_norm": 1.5279869032792845, "learning_rate": 7.34837830369103e-06, "loss": 0.3526, "step": 3741 }, { "epoch": 0.36418491484184917, "grad_norm": 1.3094265238017966, "learning_rate": 7.346986695639439e-06, "loss": 0.3748, "step": 3742 }, { "epoch": 0.36428223844282237, "grad_norm": 1.4537907377719694, "learning_rate": 7.34559485436634e-06, "loss": 0.1896, "step": 3743 }, { "epoch": 0.3643795620437956, "grad_norm": 1.329137618047409, "learning_rate": 7.344202780010036e-06, "loss": 0.3121, "step": 3744 }, { "epoch": 0.3644768856447689, "grad_norm": 1.6436280150893918, "learning_rate": 7.342810472708861e-06, "loss": 0.4822, "step": 3745 }, { "epoch": 0.3645742092457421, "grad_norm": 1.3725794380420981, "learning_rate": 7.341417932601169e-06, "loss": 0.3409, "step": 3746 }, { "epoch": 0.36467153284671533, "grad_norm": 1.5052359115379705, "learning_rate": 7.34002515982534e-06, "loss": 0.2377, "step": 3747 }, { "epoch": 0.3647688564476886, "grad_norm": 1.3552921410971608, "learning_rate": 7.3386321545197715e-06, "loss": 0.3572, "step": 3748 }, { "epoch": 0.3648661800486618, "grad_norm": 1.5018874056046176, "learning_rate": 7.337238916822888e-06, "loss": 0.4986, "step": 3749 }, { "epoch": 0.36496350364963503, "grad_norm": 1.8358104905369528, "learning_rate": 7.335845446873137e-06, "loss": 0.3914, "step": 3750 }, { "epoch": 0.3650608272506083, "grad_norm": 1.1168351051108918, "learning_rate": 7.334451744808988e-06, "loss": 0.2733, "step": 3751 }, { "epoch": 0.3651581508515815, "grad_norm": 1.3776079933771972, "learning_rate": 7.333057810768934e-06, "loss": 0.3787, "step": 3752 }, { "epoch": 0.36525547445255474, "grad_norm": 1.341701987297211, "learning_rate": 7.331663644891492e-06, "loss": 0.3567, "step": 3753 }, { "epoch": 0.365352798053528, "grad_norm": 1.5110248875717456, "learning_rate": 7.3302692473152e-06, "loss": 0.345, "step": 3754 }, { "epoch": 0.3654501216545012, "grad_norm": 1.3479707654958055, "learning_rate": 7.328874618178621e-06, "loss": 0.3874, "step": 3755 }, { "epoch": 0.36554744525547445, "grad_norm": 1.4702256277367285, "learning_rate": 7.32747975762034e-06, "loss": 0.2701, "step": 3756 }, { "epoch": 0.3656447688564477, "grad_norm": 1.2848329456259648, "learning_rate": 7.326084665778965e-06, "loss": 0.428, "step": 3757 }, { "epoch": 0.3657420924574209, "grad_norm": 1.3129833784540297, "learning_rate": 7.324689342793125e-06, "loss": 0.3708, "step": 3758 }, { "epoch": 0.36583941605839415, "grad_norm": 1.585613649874231, "learning_rate": 7.323293788801478e-06, "loss": 0.5306, "step": 3759 }, { "epoch": 0.3659367396593674, "grad_norm": 1.829241550219066, "learning_rate": 7.3218980039427e-06, "loss": 0.4737, "step": 3760 }, { "epoch": 0.36603406326034066, "grad_norm": 1.395899680918888, "learning_rate": 7.320501988355488e-06, "loss": 0.4636, "step": 3761 }, { "epoch": 0.36613138686131386, "grad_norm": 1.5046483178350247, "learning_rate": 7.319105742178568e-06, "loss": 0.4155, "step": 3762 }, { "epoch": 0.3662287104622871, "grad_norm": 1.280400622675231, "learning_rate": 7.317709265550685e-06, "loss": 0.3885, "step": 3763 }, { "epoch": 0.36632603406326036, "grad_norm": 1.217266157593417, "learning_rate": 7.316312558610608e-06, "loss": 0.3962, "step": 3764 }, { "epoch": 0.36642335766423356, "grad_norm": 1.263633008120406, "learning_rate": 7.314915621497129e-06, "loss": 0.478, "step": 3765 }, { "epoch": 0.3665206812652068, "grad_norm": 1.3563025380870863, "learning_rate": 7.31351845434906e-06, "loss": 0.4117, "step": 3766 }, { "epoch": 0.36661800486618007, "grad_norm": 1.2737220025097429, "learning_rate": 7.312121057305241e-06, "loss": 0.424, "step": 3767 }, { "epoch": 0.36671532846715327, "grad_norm": 1.291092078272358, "learning_rate": 7.3107234305045324e-06, "loss": 0.334, "step": 3768 }, { "epoch": 0.3668126520681265, "grad_norm": 1.3126844548992553, "learning_rate": 7.309325574085815e-06, "loss": 0.4322, "step": 3769 }, { "epoch": 0.3669099756690998, "grad_norm": 1.1245662064795194, "learning_rate": 7.307927488187997e-06, "loss": 0.2236, "step": 3770 }, { "epoch": 0.367007299270073, "grad_norm": 1.5102285684556136, "learning_rate": 7.306529172950006e-06, "loss": 0.5623, "step": 3771 }, { "epoch": 0.3671046228710462, "grad_norm": 1.685763702060817, "learning_rate": 7.305130628510792e-06, "loss": 0.6844, "step": 3772 }, { "epoch": 0.3672019464720195, "grad_norm": 1.2974033428491907, "learning_rate": 7.30373185500933e-06, "loss": 0.423, "step": 3773 }, { "epoch": 0.3672992700729927, "grad_norm": 1.5850183990084508, "learning_rate": 7.302332852584619e-06, "loss": 0.5261, "step": 3774 }, { "epoch": 0.36739659367396593, "grad_norm": 1.2629993659748047, "learning_rate": 7.3009336213756775e-06, "loss": 0.4166, "step": 3775 }, { "epoch": 0.3674939172749392, "grad_norm": 1.5305199731858852, "learning_rate": 7.299534161521548e-06, "loss": 0.3868, "step": 3776 }, { "epoch": 0.3675912408759124, "grad_norm": 1.3498481506141689, "learning_rate": 7.298134473161293e-06, "loss": 0.4383, "step": 3777 }, { "epoch": 0.36768856447688564, "grad_norm": 1.1775129878791764, "learning_rate": 7.296734556434006e-06, "loss": 0.3227, "step": 3778 }, { "epoch": 0.3677858880778589, "grad_norm": 1.2659702536536892, "learning_rate": 7.295334411478793e-06, "loss": 0.3261, "step": 3779 }, { "epoch": 0.3678832116788321, "grad_norm": 1.29047857012686, "learning_rate": 7.293934038434789e-06, "loss": 0.3675, "step": 3780 }, { "epoch": 0.36798053527980534, "grad_norm": 1.4852568566742426, "learning_rate": 7.292533437441149e-06, "loss": 0.5919, "step": 3781 }, { "epoch": 0.3680778588807786, "grad_norm": 1.6696818575227366, "learning_rate": 7.291132608637053e-06, "loss": 0.574, "step": 3782 }, { "epoch": 0.36817518248175185, "grad_norm": 1.1373482236107322, "learning_rate": 7.289731552161701e-06, "loss": 0.2637, "step": 3783 }, { "epoch": 0.36827250608272505, "grad_norm": 1.3365823182266015, "learning_rate": 7.288330268154318e-06, "loss": 0.3472, "step": 3784 }, { "epoch": 0.3683698296836983, "grad_norm": 1.372948416466753, "learning_rate": 7.286928756754148e-06, "loss": 0.328, "step": 3785 }, { "epoch": 0.36846715328467156, "grad_norm": 1.4321229130137978, "learning_rate": 7.285527018100464e-06, "loss": 0.4256, "step": 3786 }, { "epoch": 0.36856447688564475, "grad_norm": 1.5800422881297964, "learning_rate": 7.284125052332554e-06, "loss": 0.6671, "step": 3787 }, { "epoch": 0.368661800486618, "grad_norm": 1.2921074248553535, "learning_rate": 7.282722859589734e-06, "loss": 0.2557, "step": 3788 }, { "epoch": 0.36875912408759126, "grad_norm": 1.5092560968103714, "learning_rate": 7.281320440011339e-06, "loss": 0.483, "step": 3789 }, { "epoch": 0.36885644768856446, "grad_norm": 1.6123103806541292, "learning_rate": 7.279917793736732e-06, "loss": 0.6551, "step": 3790 }, { "epoch": 0.3689537712895377, "grad_norm": 1.4808246114399204, "learning_rate": 7.278514920905291e-06, "loss": 0.2513, "step": 3791 }, { "epoch": 0.36905109489051097, "grad_norm": 1.5663493234083814, "learning_rate": 7.277111821656423e-06, "loss": 0.571, "step": 3792 }, { "epoch": 0.36914841849148416, "grad_norm": 1.363238577188513, "learning_rate": 7.275708496129552e-06, "loss": 0.4348, "step": 3793 }, { "epoch": 0.3692457420924574, "grad_norm": 1.4565243900455658, "learning_rate": 7.27430494446413e-06, "loss": 0.4819, "step": 3794 }, { "epoch": 0.36934306569343067, "grad_norm": 1.384454947890668, "learning_rate": 7.272901166799628e-06, "loss": 0.3765, "step": 3795 }, { "epoch": 0.36944038929440387, "grad_norm": 1.4124479402383887, "learning_rate": 7.27149716327554e-06, "loss": 0.4979, "step": 3796 }, { "epoch": 0.3695377128953771, "grad_norm": 1.5746058137286074, "learning_rate": 7.270092934031383e-06, "loss": 0.6907, "step": 3797 }, { "epoch": 0.3696350364963504, "grad_norm": 1.149224148134044, "learning_rate": 7.268688479206694e-06, "loss": 0.2767, "step": 3798 }, { "epoch": 0.3697323600973236, "grad_norm": 1.3675830377895282, "learning_rate": 7.267283798941038e-06, "loss": 0.4495, "step": 3799 }, { "epoch": 0.36982968369829683, "grad_norm": 1.4280450276827679, "learning_rate": 7.265878893373996e-06, "loss": 0.3899, "step": 3800 }, { "epoch": 0.3699270072992701, "grad_norm": 1.49782627276864, "learning_rate": 7.264473762645178e-06, "loss": 0.4774, "step": 3801 }, { "epoch": 0.37002433090024334, "grad_norm": 1.3952118310784345, "learning_rate": 7.263068406894209e-06, "loss": 0.5262, "step": 3802 }, { "epoch": 0.37012165450121653, "grad_norm": 1.05962510209006, "learning_rate": 7.261662826260741e-06, "loss": 0.285, "step": 3803 }, { "epoch": 0.3702189781021898, "grad_norm": 1.1322437831904242, "learning_rate": 7.260257020884448e-06, "loss": 0.3328, "step": 3804 }, { "epoch": 0.37031630170316304, "grad_norm": 1.578419195268038, "learning_rate": 7.2588509909050254e-06, "loss": 0.6624, "step": 3805 }, { "epoch": 0.37041362530413624, "grad_norm": 1.6753162370187171, "learning_rate": 7.257444736462193e-06, "loss": 0.6607, "step": 3806 }, { "epoch": 0.3705109489051095, "grad_norm": 1.2155863954352035, "learning_rate": 7.2560382576956875e-06, "loss": 0.385, "step": 3807 }, { "epoch": 0.37060827250608275, "grad_norm": 1.3086656237549659, "learning_rate": 7.254631554745275e-06, "loss": 0.3251, "step": 3808 }, { "epoch": 0.37070559610705595, "grad_norm": 1.6932808088932305, "learning_rate": 7.253224627750738e-06, "loss": 0.5078, "step": 3809 }, { "epoch": 0.3708029197080292, "grad_norm": 1.140327832951829, "learning_rate": 7.251817476851886e-06, "loss": 0.3278, "step": 3810 }, { "epoch": 0.37090024330900245, "grad_norm": 1.2897084785014141, "learning_rate": 7.2504101021885475e-06, "loss": 0.3223, "step": 3811 }, { "epoch": 0.37099756690997565, "grad_norm": 1.4467933285693058, "learning_rate": 7.249002503900573e-06, "loss": 0.3934, "step": 3812 }, { "epoch": 0.3710948905109489, "grad_norm": 1.208314429606629, "learning_rate": 7.2475946821278374e-06, "loss": 0.4511, "step": 3813 }, { "epoch": 0.37119221411192216, "grad_norm": 1.3518646431718864, "learning_rate": 7.2461866370102354e-06, "loss": 0.4939, "step": 3814 }, { "epoch": 0.37128953771289536, "grad_norm": 1.360244692009117, "learning_rate": 7.244778368687688e-06, "loss": 0.3937, "step": 3815 }, { "epoch": 0.3713868613138686, "grad_norm": 1.3528197069652437, "learning_rate": 7.243369877300135e-06, "loss": 0.3492, "step": 3816 }, { "epoch": 0.37148418491484186, "grad_norm": 1.2751734783858382, "learning_rate": 7.2419611629875386e-06, "loss": 0.4052, "step": 3817 }, { "epoch": 0.37158150851581506, "grad_norm": 1.2580260101504126, "learning_rate": 7.240552225889882e-06, "loss": 0.3386, "step": 3818 }, { "epoch": 0.3716788321167883, "grad_norm": 1.3674139216806946, "learning_rate": 7.239143066147174e-06, "loss": 0.2891, "step": 3819 }, { "epoch": 0.37177615571776157, "grad_norm": 1.0530572795884554, "learning_rate": 7.237733683899444e-06, "loss": 0.2657, "step": 3820 }, { "epoch": 0.37187347931873477, "grad_norm": 2.205291656616187, "learning_rate": 7.236324079286742e-06, "loss": 0.2303, "step": 3821 }, { "epoch": 0.371970802919708, "grad_norm": 1.1572222874558429, "learning_rate": 7.234914252449141e-06, "loss": 0.2307, "step": 3822 }, { "epoch": 0.3720681265206813, "grad_norm": 1.3550158356870847, "learning_rate": 7.233504203526738e-06, "loss": 0.4026, "step": 3823 }, { "epoch": 0.37216545012165453, "grad_norm": 1.1916885791889118, "learning_rate": 7.232093932659648e-06, "loss": 0.384, "step": 3824 }, { "epoch": 0.3722627737226277, "grad_norm": 1.3181641384661131, "learning_rate": 7.230683439988013e-06, "loss": 0.3978, "step": 3825 }, { "epoch": 0.372360097323601, "grad_norm": 1.4449003714564748, "learning_rate": 7.229272725651995e-06, "loss": 0.4663, "step": 3826 }, { "epoch": 0.37245742092457423, "grad_norm": 1.110719215634453, "learning_rate": 7.2278617897917734e-06, "loss": 0.3073, "step": 3827 }, { "epoch": 0.37255474452554743, "grad_norm": 1.3075317628405025, "learning_rate": 7.226450632547558e-06, "loss": 0.341, "step": 3828 }, { "epoch": 0.3726520681265207, "grad_norm": 1.490372677606144, "learning_rate": 7.225039254059574e-06, "loss": 0.483, "step": 3829 }, { "epoch": 0.37274939172749394, "grad_norm": 1.3137386471795909, "learning_rate": 7.223627654468072e-06, "loss": 0.3739, "step": 3830 }, { "epoch": 0.37284671532846714, "grad_norm": 1.242608245033651, "learning_rate": 7.2222158339133245e-06, "loss": 0.2848, "step": 3831 }, { "epoch": 0.3729440389294404, "grad_norm": 1.4129798917912184, "learning_rate": 7.220803792535621e-06, "loss": 0.3657, "step": 3832 }, { "epoch": 0.37304136253041364, "grad_norm": 1.7023996330847022, "learning_rate": 7.2193915304752815e-06, "loss": 0.601, "step": 3833 }, { "epoch": 0.37313868613138684, "grad_norm": 1.587053329225552, "learning_rate": 7.2179790478726405e-06, "loss": 0.3858, "step": 3834 }, { "epoch": 0.3732360097323601, "grad_norm": 1.50243862921725, "learning_rate": 7.216566344868059e-06, "loss": 0.3792, "step": 3835 }, { "epoch": 0.37333333333333335, "grad_norm": 1.2996422990779706, "learning_rate": 7.215153421601917e-06, "loss": 0.3695, "step": 3836 }, { "epoch": 0.37343065693430655, "grad_norm": 1.2661927915794233, "learning_rate": 7.2137402782146185e-06, "loss": 0.2922, "step": 3837 }, { "epoch": 0.3735279805352798, "grad_norm": 1.342404468128034, "learning_rate": 7.212326914846587e-06, "loss": 0.3832, "step": 3838 }, { "epoch": 0.37362530413625306, "grad_norm": 1.3085065670293223, "learning_rate": 7.2109133316382716e-06, "loss": 0.3435, "step": 3839 }, { "epoch": 0.37372262773722625, "grad_norm": 1.4136536570943345, "learning_rate": 7.209499528730138e-06, "loss": 0.3996, "step": 3840 }, { "epoch": 0.3738199513381995, "grad_norm": 1.3042560709248439, "learning_rate": 7.208085506262679e-06, "loss": 0.3547, "step": 3841 }, { "epoch": 0.37391727493917276, "grad_norm": 1.1956805990066928, "learning_rate": 7.206671264376406e-06, "loss": 0.3349, "step": 3842 }, { "epoch": 0.37401459854014596, "grad_norm": 1.3725427866174449, "learning_rate": 7.205256803211852e-06, "loss": 0.3136, "step": 3843 }, { "epoch": 0.3741119221411192, "grad_norm": 1.1819946263218575, "learning_rate": 7.203842122909576e-06, "loss": 0.3854, "step": 3844 }, { "epoch": 0.37420924574209247, "grad_norm": 1.1749292879944346, "learning_rate": 7.202427223610153e-06, "loss": 0.2567, "step": 3845 }, { "epoch": 0.3743065693430657, "grad_norm": 1.6177760725542194, "learning_rate": 7.201012105454181e-06, "loss": 0.6999, "step": 3846 }, { "epoch": 0.3744038929440389, "grad_norm": 1.5266511327183672, "learning_rate": 7.199596768582284e-06, "loss": 0.5089, "step": 3847 }, { "epoch": 0.37450121654501217, "grad_norm": 1.351768037018499, "learning_rate": 7.198181213135107e-06, "loss": 0.3024, "step": 3848 }, { "epoch": 0.3745985401459854, "grad_norm": 2.0647138793608883, "learning_rate": 7.19676543925331e-06, "loss": 0.4637, "step": 3849 }, { "epoch": 0.3746958637469586, "grad_norm": 1.0901772593808003, "learning_rate": 7.19534944707758e-06, "loss": 0.3197, "step": 3850 }, { "epoch": 0.3747931873479319, "grad_norm": 1.5563466788905995, "learning_rate": 7.193933236748627e-06, "loss": 0.4692, "step": 3851 }, { "epoch": 0.37489051094890513, "grad_norm": 1.6383898814090063, "learning_rate": 7.192516808407179e-06, "loss": 0.4814, "step": 3852 }, { "epoch": 0.37498783454987833, "grad_norm": 1.234160440846945, "learning_rate": 7.191100162193989e-06, "loss": 0.4099, "step": 3853 }, { "epoch": 0.3750851581508516, "grad_norm": 1.5270305810050784, "learning_rate": 7.189683298249829e-06, "loss": 0.553, "step": 3854 }, { "epoch": 0.37518248175182484, "grad_norm": 1.4979140559754238, "learning_rate": 7.1882662167154935e-06, "loss": 0.2972, "step": 3855 }, { "epoch": 0.37527980535279803, "grad_norm": 1.4793577831895819, "learning_rate": 7.186848917731799e-06, "loss": 0.517, "step": 3856 }, { "epoch": 0.3753771289537713, "grad_norm": 1.1834990208552847, "learning_rate": 7.1854314014395836e-06, "loss": 0.2198, "step": 3857 }, { "epoch": 0.37547445255474454, "grad_norm": 1.4192969456874913, "learning_rate": 7.184013667979707e-06, "loss": 0.4349, "step": 3858 }, { "epoch": 0.37557177615571774, "grad_norm": 1.411758906981302, "learning_rate": 7.1825957174930495e-06, "loss": 0.6343, "step": 3859 }, { "epoch": 0.375669099756691, "grad_norm": 1.4732647759178685, "learning_rate": 7.181177550120514e-06, "loss": 0.4049, "step": 3860 }, { "epoch": 0.37576642335766425, "grad_norm": 1.3463082468649135, "learning_rate": 7.1797591660030245e-06, "loss": 0.4196, "step": 3861 }, { "epoch": 0.37586374695863745, "grad_norm": 1.2161261851692835, "learning_rate": 7.178340565281527e-06, "loss": 0.3326, "step": 3862 }, { "epoch": 0.3759610705596107, "grad_norm": 1.3015426377776436, "learning_rate": 7.176921748096987e-06, "loss": 0.3816, "step": 3863 }, { "epoch": 0.37605839416058395, "grad_norm": 1.2745689561042768, "learning_rate": 7.175502714590398e-06, "loss": 0.365, "step": 3864 }, { "epoch": 0.37615571776155715, "grad_norm": 1.3894630463948126, "learning_rate": 7.174083464902765e-06, "loss": 0.5022, "step": 3865 }, { "epoch": 0.3762530413625304, "grad_norm": 1.6621696201258571, "learning_rate": 7.172663999175123e-06, "loss": 0.6661, "step": 3866 }, { "epoch": 0.37635036496350366, "grad_norm": 1.6093508524147906, "learning_rate": 7.171244317548522e-06, "loss": 0.2752, "step": 3867 }, { "epoch": 0.3764476885644769, "grad_norm": 1.2470356123121078, "learning_rate": 7.16982442016404e-06, "loss": 0.2922, "step": 3868 }, { "epoch": 0.3765450121654501, "grad_norm": 1.4271789388815392, "learning_rate": 7.168404307162773e-06, "loss": 0.3537, "step": 3869 }, { "epoch": 0.37664233576642336, "grad_norm": 1.2732277445845157, "learning_rate": 7.166983978685835e-06, "loss": 0.2861, "step": 3870 }, { "epoch": 0.3767396593673966, "grad_norm": 1.5040576412741455, "learning_rate": 7.165563434874367e-06, "loss": 0.5636, "step": 3871 }, { "epoch": 0.3768369829683698, "grad_norm": 1.305952270015343, "learning_rate": 7.164142675869531e-06, "loss": 0.3189, "step": 3872 }, { "epoch": 0.37693430656934307, "grad_norm": 1.297847888444762, "learning_rate": 7.162721701812506e-06, "loss": 0.2904, "step": 3873 }, { "epoch": 0.3770316301703163, "grad_norm": 1.4659816543867576, "learning_rate": 7.161300512844496e-06, "loss": 0.2955, "step": 3874 }, { "epoch": 0.3771289537712895, "grad_norm": 1.2035904516565876, "learning_rate": 7.159879109106726e-06, "loss": 0.2557, "step": 3875 }, { "epoch": 0.3772262773722628, "grad_norm": 1.598650858223209, "learning_rate": 7.158457490740442e-06, "loss": 0.7335, "step": 3876 }, { "epoch": 0.37732360097323603, "grad_norm": 1.2305628180498833, "learning_rate": 7.157035657886911e-06, "loss": 0.2654, "step": 3877 }, { "epoch": 0.3774209245742092, "grad_norm": 1.709426282151801, "learning_rate": 7.1556136106874195e-06, "loss": 0.4445, "step": 3878 }, { "epoch": 0.3775182481751825, "grad_norm": 1.5920181588666038, "learning_rate": 7.154191349283278e-06, "loss": 0.4233, "step": 3879 }, { "epoch": 0.37761557177615573, "grad_norm": 1.4519282509492475, "learning_rate": 7.152768873815819e-06, "loss": 0.4399, "step": 3880 }, { "epoch": 0.37771289537712893, "grad_norm": 1.118109262781078, "learning_rate": 7.151346184426394e-06, "loss": 0.2138, "step": 3881 }, { "epoch": 0.3778102189781022, "grad_norm": 1.233018443573734, "learning_rate": 7.1499232812563765e-06, "loss": 0.266, "step": 3882 }, { "epoch": 0.37790754257907544, "grad_norm": 1.4646778063558124, "learning_rate": 7.148500164447159e-06, "loss": 0.4118, "step": 3883 }, { "epoch": 0.37800486618004864, "grad_norm": 1.1142467929519195, "learning_rate": 7.147076834140163e-06, "loss": 0.3422, "step": 3884 }, { "epoch": 0.3781021897810219, "grad_norm": 1.2249131016174075, "learning_rate": 7.145653290476821e-06, "loss": 0.3973, "step": 3885 }, { "epoch": 0.37819951338199514, "grad_norm": 1.3075094712827902, "learning_rate": 7.144229533598593e-06, "loss": 0.4029, "step": 3886 }, { "epoch": 0.37829683698296834, "grad_norm": 1.2831548162044333, "learning_rate": 7.142805563646957e-06, "loss": 0.3863, "step": 3887 }, { "epoch": 0.3783941605839416, "grad_norm": 0.8015397364189331, "learning_rate": 7.1413813807634144e-06, "loss": 0.1929, "step": 3888 }, { "epoch": 0.37849148418491485, "grad_norm": 2.1960941972871186, "learning_rate": 7.1399569850894886e-06, "loss": 0.4767, "step": 3889 }, { "epoch": 0.3785888077858881, "grad_norm": 1.4481987255363193, "learning_rate": 7.138532376766722e-06, "loss": 0.4454, "step": 3890 }, { "epoch": 0.3786861313868613, "grad_norm": 0.951708820286833, "learning_rate": 7.13710755593668e-06, "loss": 0.186, "step": 3891 }, { "epoch": 0.37878345498783456, "grad_norm": 1.6714364305481455, "learning_rate": 7.1356825227409455e-06, "loss": 0.6873, "step": 3892 }, { "epoch": 0.3788807785888078, "grad_norm": 2.6374385953146238, "learning_rate": 7.134257277321126e-06, "loss": 0.3606, "step": 3893 }, { "epoch": 0.378978102189781, "grad_norm": 1.326824044728447, "learning_rate": 7.13283181981885e-06, "loss": 0.2763, "step": 3894 }, { "epoch": 0.37907542579075426, "grad_norm": 1.3996167884606898, "learning_rate": 7.131406150375764e-06, "loss": 0.5331, "step": 3895 }, { "epoch": 0.3791727493917275, "grad_norm": 1.2228137027647799, "learning_rate": 7.129980269133539e-06, "loss": 0.27, "step": 3896 }, { "epoch": 0.3792700729927007, "grad_norm": 1.4657212540958298, "learning_rate": 7.128554176233865e-06, "loss": 0.3137, "step": 3897 }, { "epoch": 0.37936739659367397, "grad_norm": 1.9918813432796558, "learning_rate": 7.127127871818455e-06, "loss": 0.3948, "step": 3898 }, { "epoch": 0.3794647201946472, "grad_norm": 1.8850475673593161, "learning_rate": 7.12570135602904e-06, "loss": 0.775, "step": 3899 }, { "epoch": 0.3795620437956204, "grad_norm": 1.6679949077189995, "learning_rate": 7.124274629007375e-06, "loss": 0.6015, "step": 3900 }, { "epoch": 0.37965936739659367, "grad_norm": 1.2107596482139231, "learning_rate": 7.122847690895235e-06, "loss": 0.2573, "step": 3901 }, { "epoch": 0.3797566909975669, "grad_norm": 1.352463967342193, "learning_rate": 7.1214205418344155e-06, "loss": 0.428, "step": 3902 }, { "epoch": 0.3798540145985401, "grad_norm": 1.6246032230437741, "learning_rate": 7.1199931819667316e-06, "loss": 0.5207, "step": 3903 }, { "epoch": 0.3799513381995134, "grad_norm": 1.4207183828680439, "learning_rate": 7.118565611434023e-06, "loss": 0.3472, "step": 3904 }, { "epoch": 0.38004866180048663, "grad_norm": 1.1713460109772222, "learning_rate": 7.117137830378147e-06, "loss": 0.3043, "step": 3905 }, { "epoch": 0.38014598540145983, "grad_norm": 1.4955277530157316, "learning_rate": 7.115709838940983e-06, "loss": 0.5889, "step": 3906 }, { "epoch": 0.3802433090024331, "grad_norm": 1.4020682146561894, "learning_rate": 7.114281637264433e-06, "loss": 0.5088, "step": 3907 }, { "epoch": 0.38034063260340634, "grad_norm": 1.547124360918337, "learning_rate": 7.112853225490417e-06, "loss": 0.3316, "step": 3908 }, { "epoch": 0.38043795620437953, "grad_norm": 1.2645380509557882, "learning_rate": 7.111424603760877e-06, "loss": 0.4013, "step": 3909 }, { "epoch": 0.3805352798053528, "grad_norm": 1.7019140855865837, "learning_rate": 7.109995772217776e-06, "loss": 0.6348, "step": 3910 }, { "epoch": 0.38063260340632604, "grad_norm": 1.304725235261976, "learning_rate": 7.108566731003099e-06, "loss": 0.4496, "step": 3911 }, { "epoch": 0.3807299270072993, "grad_norm": 1.324816183619312, "learning_rate": 7.1071374802588496e-06, "loss": 0.3335, "step": 3912 }, { "epoch": 0.3808272506082725, "grad_norm": 1.3880021724436575, "learning_rate": 7.1057080201270535e-06, "loss": 0.46, "step": 3913 }, { "epoch": 0.38092457420924575, "grad_norm": 1.5324573817091165, "learning_rate": 7.104278350749757e-06, "loss": 0.4837, "step": 3914 }, { "epoch": 0.381021897810219, "grad_norm": 1.415341468021005, "learning_rate": 7.1028484722690275e-06, "loss": 0.2941, "step": 3915 }, { "epoch": 0.3811192214111922, "grad_norm": 1.2327883216411208, "learning_rate": 7.101418384826953e-06, "loss": 0.4483, "step": 3916 }, { "epoch": 0.38121654501216545, "grad_norm": 1.4959339823461342, "learning_rate": 7.099988088565642e-06, "loss": 0.4297, "step": 3917 }, { "epoch": 0.3813138686131387, "grad_norm": 1.5384484629808992, "learning_rate": 7.098557583627224e-06, "loss": 0.4326, "step": 3918 }, { "epoch": 0.3814111922141119, "grad_norm": 1.6679471659994731, "learning_rate": 7.097126870153849e-06, "loss": 0.5069, "step": 3919 }, { "epoch": 0.38150851581508516, "grad_norm": 1.45117080708974, "learning_rate": 7.095695948287686e-06, "loss": 0.3661, "step": 3920 }, { "epoch": 0.3816058394160584, "grad_norm": 1.4481947743840338, "learning_rate": 7.094264818170931e-06, "loss": 0.3961, "step": 3921 }, { "epoch": 0.3817031630170316, "grad_norm": 1.348596217427556, "learning_rate": 7.092833479945793e-06, "loss": 0.4029, "step": 3922 }, { "epoch": 0.38180048661800486, "grad_norm": 1.3981471692690721, "learning_rate": 7.091401933754507e-06, "loss": 0.3814, "step": 3923 }, { "epoch": 0.3818978102189781, "grad_norm": 1.8099756090617796, "learning_rate": 7.089970179739323e-06, "loss": 0.3998, "step": 3924 }, { "epoch": 0.3819951338199513, "grad_norm": 1.5103246288100645, "learning_rate": 7.088538218042519e-06, "loss": 0.5697, "step": 3925 }, { "epoch": 0.38209245742092457, "grad_norm": 1.1477205283158238, "learning_rate": 7.087106048806388e-06, "loss": 0.3023, "step": 3926 }, { "epoch": 0.3821897810218978, "grad_norm": 1.2535758370464407, "learning_rate": 7.085673672173247e-06, "loss": 0.3408, "step": 3927 }, { "epoch": 0.382287104622871, "grad_norm": 1.1952324136071744, "learning_rate": 7.0842410882854305e-06, "loss": 0.3339, "step": 3928 }, { "epoch": 0.3823844282238443, "grad_norm": 1.2651491565748119, "learning_rate": 7.082808297285296e-06, "loss": 0.3817, "step": 3929 }, { "epoch": 0.38248175182481753, "grad_norm": 1.5429938591954184, "learning_rate": 7.081375299315221e-06, "loss": 0.5274, "step": 3930 }, { "epoch": 0.3825790754257907, "grad_norm": 1.3843509715042235, "learning_rate": 7.0799420945176026e-06, "loss": 0.4354, "step": 3931 }, { "epoch": 0.382676399026764, "grad_norm": 1.2382754737887756, "learning_rate": 7.078508683034862e-06, "loss": 0.4135, "step": 3932 }, { "epoch": 0.38277372262773723, "grad_norm": 1.659344068335914, "learning_rate": 7.0770750650094335e-06, "loss": 0.357, "step": 3933 }, { "epoch": 0.3828710462287105, "grad_norm": 1.2935879886930641, "learning_rate": 7.0756412405837795e-06, "loss": 0.3475, "step": 3934 }, { "epoch": 0.3829683698296837, "grad_norm": 1.279920120428988, "learning_rate": 7.07420720990038e-06, "loss": 0.2811, "step": 3935 }, { "epoch": 0.38306569343065694, "grad_norm": 1.5715982667528452, "learning_rate": 7.072772973101735e-06, "loss": 0.2954, "step": 3936 }, { "epoch": 0.3831630170316302, "grad_norm": 1.4996379574324572, "learning_rate": 7.071338530330365e-06, "loss": 0.3162, "step": 3937 }, { "epoch": 0.3832603406326034, "grad_norm": 1.323656350948574, "learning_rate": 7.069903881728815e-06, "loss": 0.4149, "step": 3938 }, { "epoch": 0.38335766423357664, "grad_norm": 1.5522008591091048, "learning_rate": 7.068469027439642e-06, "loss": 0.5585, "step": 3939 }, { "epoch": 0.3834549878345499, "grad_norm": 1.6876876161618577, "learning_rate": 7.06703396760543e-06, "loss": 0.3946, "step": 3940 }, { "epoch": 0.3835523114355231, "grad_norm": 1.5052911896213828, "learning_rate": 7.065598702368782e-06, "loss": 0.5529, "step": 3941 }, { "epoch": 0.38364963503649635, "grad_norm": 1.7507247786327806, "learning_rate": 7.0641632318723205e-06, "loss": 0.6298, "step": 3942 }, { "epoch": 0.3837469586374696, "grad_norm": 1.295827327522188, "learning_rate": 7.062727556258693e-06, "loss": 0.3517, "step": 3943 }, { "epoch": 0.3838442822384428, "grad_norm": 1.037020059823786, "learning_rate": 7.061291675670557e-06, "loss": 0.2667, "step": 3944 }, { "epoch": 0.38394160583941606, "grad_norm": 1.4712371869692, "learning_rate": 7.059855590250604e-06, "loss": 0.4601, "step": 3945 }, { "epoch": 0.3840389294403893, "grad_norm": 1.4848893729981114, "learning_rate": 7.058419300141531e-06, "loss": 0.458, "step": 3946 }, { "epoch": 0.3841362530413625, "grad_norm": 1.3674844473838341, "learning_rate": 7.056982805486069e-06, "loss": 0.4278, "step": 3947 }, { "epoch": 0.38423357664233576, "grad_norm": 1.6438745197663538, "learning_rate": 7.055546106426961e-06, "loss": 0.6002, "step": 3948 }, { "epoch": 0.384330900243309, "grad_norm": 1.2362894591542046, "learning_rate": 7.054109203106974e-06, "loss": 0.3796, "step": 3949 }, { "epoch": 0.3844282238442822, "grad_norm": 1.6016145279039653, "learning_rate": 7.052672095668891e-06, "loss": 0.4956, "step": 3950 }, { "epoch": 0.38452554744525547, "grad_norm": 1.578296991928203, "learning_rate": 7.0512347842555205e-06, "loss": 0.3424, "step": 3951 }, { "epoch": 0.3846228710462287, "grad_norm": 1.1907848300937582, "learning_rate": 7.049797269009689e-06, "loss": 0.361, "step": 3952 }, { "epoch": 0.384720194647202, "grad_norm": 1.4905027295394417, "learning_rate": 7.048359550074244e-06, "loss": 0.4279, "step": 3953 }, { "epoch": 0.38481751824817517, "grad_norm": 1.4651231016184674, "learning_rate": 7.046921627592051e-06, "loss": 0.4622, "step": 3954 }, { "epoch": 0.3849148418491484, "grad_norm": 1.3954586503570294, "learning_rate": 7.045483501705997e-06, "loss": 0.2737, "step": 3955 }, { "epoch": 0.3850121654501217, "grad_norm": 1.4949563585883912, "learning_rate": 7.044045172558991e-06, "loss": 0.3092, "step": 3956 }, { "epoch": 0.3851094890510949, "grad_norm": 1.3655729515230581, "learning_rate": 7.042606640293958e-06, "loss": 0.4943, "step": 3957 }, { "epoch": 0.38520681265206813, "grad_norm": 1.350946010163059, "learning_rate": 7.04116790505385e-06, "loss": 0.5314, "step": 3958 }, { "epoch": 0.3853041362530414, "grad_norm": 1.3729520632050074, "learning_rate": 7.039728966981632e-06, "loss": 0.4056, "step": 3959 }, { "epoch": 0.3854014598540146, "grad_norm": 1.4800883184320073, "learning_rate": 7.038289826220292e-06, "loss": 0.4511, "step": 3960 }, { "epoch": 0.38549878345498784, "grad_norm": 1.4839457644194014, "learning_rate": 7.036850482912841e-06, "loss": 0.5634, "step": 3961 }, { "epoch": 0.3855961070559611, "grad_norm": 1.4722550146581384, "learning_rate": 7.035410937202303e-06, "loss": 0.4885, "step": 3962 }, { "epoch": 0.3856934306569343, "grad_norm": 1.385565264579537, "learning_rate": 7.033971189231731e-06, "loss": 0.4708, "step": 3963 }, { "epoch": 0.38579075425790754, "grad_norm": 1.4522732114770711, "learning_rate": 7.032531239144192e-06, "loss": 0.5378, "step": 3964 }, { "epoch": 0.3858880778588808, "grad_norm": 1.14748867569532, "learning_rate": 7.031091087082773e-06, "loss": 0.3676, "step": 3965 }, { "epoch": 0.385985401459854, "grad_norm": 1.3321032015253773, "learning_rate": 7.029650733190585e-06, "loss": 0.4896, "step": 3966 }, { "epoch": 0.38608272506082725, "grad_norm": 1.2135132498408774, "learning_rate": 7.028210177610755e-06, "loss": 0.3284, "step": 3967 }, { "epoch": 0.3861800486618005, "grad_norm": 1.5809800411366401, "learning_rate": 7.026769420486435e-06, "loss": 0.5651, "step": 3968 }, { "epoch": 0.3862773722627737, "grad_norm": 1.2932935231921396, "learning_rate": 7.025328461960791e-06, "loss": 0.3895, "step": 3969 }, { "epoch": 0.38637469586374695, "grad_norm": 1.1609928698062038, "learning_rate": 7.023887302177013e-06, "loss": 0.3102, "step": 3970 }, { "epoch": 0.3864720194647202, "grad_norm": 1.3129146011919008, "learning_rate": 7.022445941278308e-06, "loss": 0.3229, "step": 3971 }, { "epoch": 0.3865693430656934, "grad_norm": 1.5073708000931252, "learning_rate": 7.02100437940791e-06, "loss": 0.4405, "step": 3972 }, { "epoch": 0.38666666666666666, "grad_norm": 1.525639512470862, "learning_rate": 7.019562616709061e-06, "loss": 0.4644, "step": 3973 }, { "epoch": 0.3867639902676399, "grad_norm": 1.6237386062609962, "learning_rate": 7.018120653325037e-06, "loss": 0.6601, "step": 3974 }, { "epoch": 0.38686131386861317, "grad_norm": 1.345911062533185, "learning_rate": 7.016678489399121e-06, "loss": 0.5065, "step": 3975 }, { "epoch": 0.38695863746958636, "grad_norm": 1.514524079658759, "learning_rate": 7.015236125074626e-06, "loss": 0.2811, "step": 3976 }, { "epoch": 0.3870559610705596, "grad_norm": 1.4135796159763112, "learning_rate": 7.013793560494877e-06, "loss": 0.4894, "step": 3977 }, { "epoch": 0.38715328467153287, "grad_norm": 1.4104178653563566, "learning_rate": 7.012350795803223e-06, "loss": 0.5016, "step": 3978 }, { "epoch": 0.38725060827250607, "grad_norm": 1.3045531335042206, "learning_rate": 7.010907831143035e-06, "loss": 0.2089, "step": 3979 }, { "epoch": 0.3873479318734793, "grad_norm": 1.4042038760740407, "learning_rate": 7.009464666657701e-06, "loss": 0.3769, "step": 3980 }, { "epoch": 0.3874452554744526, "grad_norm": 1.479965245122337, "learning_rate": 7.008021302490626e-06, "loss": 0.4625, "step": 3981 }, { "epoch": 0.3875425790754258, "grad_norm": 1.4175283205139955, "learning_rate": 7.0065777387852405e-06, "loss": 0.3618, "step": 3982 }, { "epoch": 0.38763990267639903, "grad_norm": 1.5162882975673595, "learning_rate": 7.005133975684992e-06, "loss": 0.4722, "step": 3983 }, { "epoch": 0.3877372262773723, "grad_norm": 1.4933222106252568, "learning_rate": 7.003690013333348e-06, "loss": 0.2983, "step": 3984 }, { "epoch": 0.3878345498783455, "grad_norm": 1.3557382804846623, "learning_rate": 7.002245851873794e-06, "loss": 0.5318, "step": 3985 }, { "epoch": 0.38793187347931873, "grad_norm": 1.167640461283426, "learning_rate": 7.000801491449843e-06, "loss": 0.2841, "step": 3986 }, { "epoch": 0.388029197080292, "grad_norm": 1.4266522816945901, "learning_rate": 6.9993569322050145e-06, "loss": 0.492, "step": 3987 }, { "epoch": 0.3881265206812652, "grad_norm": 1.4821564005694208, "learning_rate": 6.997912174282859e-06, "loss": 0.4676, "step": 3988 }, { "epoch": 0.38822384428223844, "grad_norm": 1.2497816848843788, "learning_rate": 6.996467217826944e-06, "loss": 0.3818, "step": 3989 }, { "epoch": 0.3883211678832117, "grad_norm": 1.3419552727592317, "learning_rate": 6.995022062980854e-06, "loss": 0.3393, "step": 3990 }, { "epoch": 0.3884184914841849, "grad_norm": 1.8154719749479167, "learning_rate": 6.993576709888196e-06, "loss": 0.4484, "step": 3991 }, { "epoch": 0.38851581508515814, "grad_norm": 1.351000409855417, "learning_rate": 6.992131158692594e-06, "loss": 0.4945, "step": 3992 }, { "epoch": 0.3886131386861314, "grad_norm": 1.6363889229872557, "learning_rate": 6.9906854095376946e-06, "loss": 0.5338, "step": 3993 }, { "epoch": 0.3887104622871046, "grad_norm": 1.7802129035237775, "learning_rate": 6.989239462567162e-06, "loss": 0.3984, "step": 3994 }, { "epoch": 0.38880778588807785, "grad_norm": 1.5793157183409696, "learning_rate": 6.987793317924683e-06, "loss": 0.3973, "step": 3995 }, { "epoch": 0.3889051094890511, "grad_norm": 1.5416565542490828, "learning_rate": 6.986346975753958e-06, "loss": 0.5045, "step": 3996 }, { "epoch": 0.38900243309002436, "grad_norm": 0.8915263517146853, "learning_rate": 6.984900436198715e-06, "loss": 0.1697, "step": 3997 }, { "epoch": 0.38909975669099756, "grad_norm": 1.5546582089832586, "learning_rate": 6.983453699402695e-06, "loss": 0.4891, "step": 3998 }, { "epoch": 0.3891970802919708, "grad_norm": 1.5087179519043437, "learning_rate": 6.9820067655096615e-06, "loss": 0.526, "step": 3999 }, { "epoch": 0.38929440389294406, "grad_norm": 1.5145233712620612, "learning_rate": 6.980559634663397e-06, "loss": 0.4787, "step": 4000 }, { "epoch": 0.38939172749391726, "grad_norm": 1.418350630690952, "learning_rate": 6.979112307007705e-06, "loss": 0.5409, "step": 4001 }, { "epoch": 0.3894890510948905, "grad_norm": 1.6433890174783798, "learning_rate": 6.977664782686406e-06, "loss": 0.4894, "step": 4002 }, { "epoch": 0.38958637469586377, "grad_norm": 1.1028189021006047, "learning_rate": 6.976217061843343e-06, "loss": 0.2523, "step": 4003 }, { "epoch": 0.38968369829683697, "grad_norm": 1.1910178630203534, "learning_rate": 6.974769144622374e-06, "loss": 0.261, "step": 4004 }, { "epoch": 0.3897810218978102, "grad_norm": 1.416321272912632, "learning_rate": 6.9733210311673826e-06, "loss": 0.3422, "step": 4005 }, { "epoch": 0.3898783454987835, "grad_norm": 1.1678284015343599, "learning_rate": 6.971872721622268e-06, "loss": 0.2577, "step": 4006 }, { "epoch": 0.38997566909975667, "grad_norm": 1.6751662328419257, "learning_rate": 6.970424216130949e-06, "loss": 0.4419, "step": 4007 }, { "epoch": 0.3900729927007299, "grad_norm": 1.3122627861372418, "learning_rate": 6.968975514837364e-06, "loss": 0.3431, "step": 4008 }, { "epoch": 0.3901703163017032, "grad_norm": 1.4908254908438412, "learning_rate": 6.967526617885471e-06, "loss": 0.5071, "step": 4009 }, { "epoch": 0.3902676399026764, "grad_norm": 1.273008168870797, "learning_rate": 6.966077525419249e-06, "loss": 0.3637, "step": 4010 }, { "epoch": 0.39036496350364963, "grad_norm": 1.8505049634947817, "learning_rate": 6.964628237582696e-06, "loss": 0.6389, "step": 4011 }, { "epoch": 0.3904622871046229, "grad_norm": 1.350943274678673, "learning_rate": 6.963178754519826e-06, "loss": 0.3458, "step": 4012 }, { "epoch": 0.3905596107055961, "grad_norm": 1.6230761069776574, "learning_rate": 6.961729076374679e-06, "loss": 0.5931, "step": 4013 }, { "epoch": 0.39065693430656934, "grad_norm": 1.4687261376573049, "learning_rate": 6.960279203291305e-06, "loss": 0.5103, "step": 4014 }, { "epoch": 0.3907542579075426, "grad_norm": 2.279060462648419, "learning_rate": 6.958829135413783e-06, "loss": 0.3421, "step": 4015 }, { "epoch": 0.3908515815085158, "grad_norm": 1.4453991838183962, "learning_rate": 6.957378872886205e-06, "loss": 0.4648, "step": 4016 }, { "epoch": 0.39094890510948904, "grad_norm": 1.2752498064098947, "learning_rate": 6.955928415852686e-06, "loss": 0.2475, "step": 4017 }, { "epoch": 0.3910462287104623, "grad_norm": 1.3162378944934356, "learning_rate": 6.954477764457359e-06, "loss": 0.5026, "step": 4018 }, { "epoch": 0.39114355231143555, "grad_norm": 1.4455647016550808, "learning_rate": 6.953026918844375e-06, "loss": 0.3693, "step": 4019 }, { "epoch": 0.39124087591240875, "grad_norm": 1.7197570624779743, "learning_rate": 6.951575879157904e-06, "loss": 0.5467, "step": 4020 }, { "epoch": 0.391338199513382, "grad_norm": 1.3683763838921565, "learning_rate": 6.950124645542139e-06, "loss": 0.3125, "step": 4021 }, { "epoch": 0.39143552311435525, "grad_norm": 1.3684386713615748, "learning_rate": 6.948673218141291e-06, "loss": 0.4659, "step": 4022 }, { "epoch": 0.39153284671532845, "grad_norm": 2.7578384354968497, "learning_rate": 6.947221597099585e-06, "loss": 0.4887, "step": 4023 }, { "epoch": 0.3916301703163017, "grad_norm": 1.331068524451763, "learning_rate": 6.945769782561273e-06, "loss": 0.3888, "step": 4024 }, { "epoch": 0.39172749391727496, "grad_norm": 1.7490396589403405, "learning_rate": 6.944317774670622e-06, "loss": 0.3748, "step": 4025 }, { "epoch": 0.39182481751824816, "grad_norm": 1.4248992281969828, "learning_rate": 6.942865573571919e-06, "loss": 0.3915, "step": 4026 }, { "epoch": 0.3919221411192214, "grad_norm": 1.485964355867862, "learning_rate": 6.941413179409468e-06, "loss": 0.3346, "step": 4027 }, { "epoch": 0.39201946472019467, "grad_norm": 1.5466928085770633, "learning_rate": 6.939960592327599e-06, "loss": 0.5374, "step": 4028 }, { "epoch": 0.39211678832116786, "grad_norm": 1.5883887378893773, "learning_rate": 6.938507812470652e-06, "loss": 0.5226, "step": 4029 }, { "epoch": 0.3922141119221411, "grad_norm": 2.786093191780367, "learning_rate": 6.937054839982993e-06, "loss": 0.2959, "step": 4030 }, { "epoch": 0.39231143552311437, "grad_norm": 1.3276365840641022, "learning_rate": 6.935601675009003e-06, "loss": 0.4711, "step": 4031 }, { "epoch": 0.39240875912408757, "grad_norm": 1.289716288913328, "learning_rate": 6.934148317693083e-06, "loss": 0.2954, "step": 4032 }, { "epoch": 0.3925060827250608, "grad_norm": 1.44676589060076, "learning_rate": 6.932694768179659e-06, "loss": 0.326, "step": 4033 }, { "epoch": 0.3926034063260341, "grad_norm": 1.6207833969674215, "learning_rate": 6.9312410266131665e-06, "loss": 0.4315, "step": 4034 }, { "epoch": 0.3927007299270073, "grad_norm": 1.5436789121746861, "learning_rate": 6.929787093138067e-06, "loss": 0.249, "step": 4035 }, { "epoch": 0.39279805352798053, "grad_norm": 1.3498642235046783, "learning_rate": 6.9283329678988375e-06, "loss": 0.4106, "step": 4036 }, { "epoch": 0.3928953771289538, "grad_norm": 1.3134123038417191, "learning_rate": 6.926878651039975e-06, "loss": 0.3761, "step": 4037 }, { "epoch": 0.392992700729927, "grad_norm": 1.4258818290455284, "learning_rate": 6.925424142705997e-06, "loss": 0.3464, "step": 4038 }, { "epoch": 0.39309002433090023, "grad_norm": 1.4508029884759852, "learning_rate": 6.92396944304144e-06, "loss": 0.3037, "step": 4039 }, { "epoch": 0.3931873479318735, "grad_norm": 2.156543435295907, "learning_rate": 6.922514552190856e-06, "loss": 0.6332, "step": 4040 }, { "epoch": 0.39328467153284674, "grad_norm": 1.6813909365209494, "learning_rate": 6.921059470298819e-06, "loss": 0.7023, "step": 4041 }, { "epoch": 0.39338199513381994, "grad_norm": 1.3183413808089122, "learning_rate": 6.91960419750992e-06, "loss": 0.4137, "step": 4042 }, { "epoch": 0.3934793187347932, "grad_norm": 1.626376595990447, "learning_rate": 6.918148733968774e-06, "loss": 0.5167, "step": 4043 }, { "epoch": 0.39357664233576645, "grad_norm": 1.3584619722516016, "learning_rate": 6.916693079820009e-06, "loss": 0.4243, "step": 4044 }, { "epoch": 0.39367396593673964, "grad_norm": 2.692105804461361, "learning_rate": 6.915237235208274e-06, "loss": 0.2159, "step": 4045 }, { "epoch": 0.3937712895377129, "grad_norm": 1.4379984779190689, "learning_rate": 6.913781200278239e-06, "loss": 0.3612, "step": 4046 }, { "epoch": 0.39386861313868615, "grad_norm": 1.2761654800240798, "learning_rate": 6.9123249751745866e-06, "loss": 0.3696, "step": 4047 }, { "epoch": 0.39396593673965935, "grad_norm": 1.0634612055178503, "learning_rate": 6.91086856004203e-06, "loss": 0.3053, "step": 4048 }, { "epoch": 0.3940632603406326, "grad_norm": 1.3282406027353941, "learning_rate": 6.90941195502529e-06, "loss": 0.3465, "step": 4049 }, { "epoch": 0.39416058394160586, "grad_norm": 1.1979553824540066, "learning_rate": 6.907955160269107e-06, "loss": 0.3624, "step": 4050 }, { "epoch": 0.39425790754257906, "grad_norm": 1.479713025623162, "learning_rate": 6.90649817591825e-06, "loss": 0.5207, "step": 4051 }, { "epoch": 0.3943552311435523, "grad_norm": 1.2910853380088252, "learning_rate": 6.905041002117494e-06, "loss": 0.3805, "step": 4052 }, { "epoch": 0.39445255474452556, "grad_norm": 1.322518756928142, "learning_rate": 6.903583639011647e-06, "loss": 0.3741, "step": 4053 }, { "epoch": 0.39454987834549876, "grad_norm": 1.388861062512862, "learning_rate": 6.902126086745521e-06, "loss": 0.3978, "step": 4054 }, { "epoch": 0.394647201946472, "grad_norm": 1.431056050315178, "learning_rate": 6.900668345463958e-06, "loss": 0.4779, "step": 4055 }, { "epoch": 0.39474452554744527, "grad_norm": 1.3089722954111018, "learning_rate": 6.8992104153118124e-06, "loss": 0.2481, "step": 4056 }, { "epoch": 0.39484184914841847, "grad_norm": 1.571119717801153, "learning_rate": 6.8977522964339596e-06, "loss": 0.422, "step": 4057 }, { "epoch": 0.3949391727493917, "grad_norm": 1.6272627408819231, "learning_rate": 6.896293988975297e-06, "loss": 0.5442, "step": 4058 }, { "epoch": 0.395036496350365, "grad_norm": 1.5163652637569427, "learning_rate": 6.894835493080733e-06, "loss": 0.4726, "step": 4059 }, { "epoch": 0.39513381995133817, "grad_norm": 2.0784976022874897, "learning_rate": 6.8933768088952025e-06, "loss": 0.4263, "step": 4060 }, { "epoch": 0.3952311435523114, "grad_norm": 1.7443301449253348, "learning_rate": 6.8919179365636546e-06, "loss": 0.3929, "step": 4061 }, { "epoch": 0.3953284671532847, "grad_norm": 1.7924502086241416, "learning_rate": 6.8904588762310586e-06, "loss": 0.5579, "step": 4062 }, { "epoch": 0.39542579075425793, "grad_norm": 1.0193876552673304, "learning_rate": 6.888999628042401e-06, "loss": 0.2416, "step": 4063 }, { "epoch": 0.39552311435523113, "grad_norm": 1.341170474322962, "learning_rate": 6.887540192142691e-06, "loss": 0.3074, "step": 4064 }, { "epoch": 0.3956204379562044, "grad_norm": 1.8133739469107961, "learning_rate": 6.88608056867695e-06, "loss": 0.6511, "step": 4065 }, { "epoch": 0.39571776155717764, "grad_norm": 1.6797513727862947, "learning_rate": 6.884620757790226e-06, "loss": 0.5998, "step": 4066 }, { "epoch": 0.39581508515815084, "grad_norm": 1.3474983454536655, "learning_rate": 6.883160759627577e-06, "loss": 0.4278, "step": 4067 }, { "epoch": 0.3959124087591241, "grad_norm": 1.2209023274964024, "learning_rate": 6.881700574334087e-06, "loss": 0.2868, "step": 4068 }, { "epoch": 0.39600973236009734, "grad_norm": 1.5229304325648796, "learning_rate": 6.880240202054854e-06, "loss": 0.4164, "step": 4069 }, { "epoch": 0.39610705596107054, "grad_norm": 1.3625612735094477, "learning_rate": 6.878779642934996e-06, "loss": 0.3048, "step": 4070 }, { "epoch": 0.3962043795620438, "grad_norm": 1.4190083205534945, "learning_rate": 6.8773188971196515e-06, "loss": 0.385, "step": 4071 }, { "epoch": 0.39630170316301705, "grad_norm": 1.3525245197492086, "learning_rate": 6.875857964753973e-06, "loss": 0.3608, "step": 4072 }, { "epoch": 0.39639902676399025, "grad_norm": 1.4522778082083179, "learning_rate": 6.874396845983134e-06, "loss": 0.5594, "step": 4073 }, { "epoch": 0.3964963503649635, "grad_norm": 1.4455429305856882, "learning_rate": 6.87293554095233e-06, "loss": 0.4933, "step": 4074 }, { "epoch": 0.39659367396593675, "grad_norm": 1.4604158337092326, "learning_rate": 6.871474049806771e-06, "loss": 0.5305, "step": 4075 }, { "epoch": 0.39669099756690995, "grad_norm": 1.3489977365676973, "learning_rate": 6.870012372691685e-06, "loss": 0.4778, "step": 4076 }, { "epoch": 0.3967883211678832, "grad_norm": 1.356695757623522, "learning_rate": 6.86855050975232e-06, "loss": 0.3774, "step": 4077 }, { "epoch": 0.39688564476885646, "grad_norm": 1.1716870983796823, "learning_rate": 6.867088461133941e-06, "loss": 0.3492, "step": 4078 }, { "epoch": 0.39698296836982966, "grad_norm": 1.478028392255781, "learning_rate": 6.865626226981834e-06, "loss": 0.4141, "step": 4079 }, { "epoch": 0.3970802919708029, "grad_norm": 1.239501688643302, "learning_rate": 6.864163807441304e-06, "loss": 0.3388, "step": 4080 }, { "epoch": 0.39717761557177617, "grad_norm": 1.5351372249288278, "learning_rate": 6.86270120265767e-06, "loss": 0.5215, "step": 4081 }, { "epoch": 0.3972749391727494, "grad_norm": 1.382749480627249, "learning_rate": 6.861238412776272e-06, "loss": 0.5118, "step": 4082 }, { "epoch": 0.3973722627737226, "grad_norm": 1.3595228443646563, "learning_rate": 6.8597754379424695e-06, "loss": 0.3972, "step": 4083 }, { "epoch": 0.39746958637469587, "grad_norm": 1.9244235129217742, "learning_rate": 6.858312278301638e-06, "loss": 0.4423, "step": 4084 }, { "epoch": 0.3975669099756691, "grad_norm": 1.4893887621731592, "learning_rate": 6.856848933999174e-06, "loss": 0.4281, "step": 4085 }, { "epoch": 0.3976642335766423, "grad_norm": 1.159936646507266, "learning_rate": 6.85538540518049e-06, "loss": 0.2303, "step": 4086 }, { "epoch": 0.3977615571776156, "grad_norm": 1.5864417866949991, "learning_rate": 6.853921691991018e-06, "loss": 0.301, "step": 4087 }, { "epoch": 0.39785888077858883, "grad_norm": 1.2139649969393456, "learning_rate": 6.852457794576207e-06, "loss": 0.3066, "step": 4088 }, { "epoch": 0.39795620437956203, "grad_norm": 1.4984653887972654, "learning_rate": 6.850993713081527e-06, "loss": 0.4157, "step": 4089 }, { "epoch": 0.3980535279805353, "grad_norm": 1.3362834103353325, "learning_rate": 6.8495294476524636e-06, "loss": 0.2316, "step": 4090 }, { "epoch": 0.39815085158150854, "grad_norm": 1.4160149515084606, "learning_rate": 6.848064998434523e-06, "loss": 0.4297, "step": 4091 }, { "epoch": 0.39824817518248173, "grad_norm": 1.4266675728649458, "learning_rate": 6.846600365573226e-06, "loss": 0.3893, "step": 4092 }, { "epoch": 0.398345498783455, "grad_norm": 1.4688794505110578, "learning_rate": 6.845135549214117e-06, "loss": 0.2136, "step": 4093 }, { "epoch": 0.39844282238442824, "grad_norm": 1.3580044820287391, "learning_rate": 6.843670549502755e-06, "loss": 0.335, "step": 4094 }, { "epoch": 0.39854014598540144, "grad_norm": 1.4677889500126287, "learning_rate": 6.842205366584716e-06, "loss": 0.1751, "step": 4095 }, { "epoch": 0.3986374695863747, "grad_norm": 1.3678338838909818, "learning_rate": 6.840740000605598e-06, "loss": 0.4195, "step": 4096 }, { "epoch": 0.39873479318734795, "grad_norm": 1.611968238148494, "learning_rate": 6.8392744517110135e-06, "loss": 0.4716, "step": 4097 }, { "epoch": 0.39883211678832114, "grad_norm": 1.2798081635725236, "learning_rate": 6.837808720046598e-06, "loss": 0.2324, "step": 4098 }, { "epoch": 0.3989294403892944, "grad_norm": 1.2301645538697144, "learning_rate": 6.836342805758e-06, "loss": 0.3178, "step": 4099 }, { "epoch": 0.39902676399026765, "grad_norm": 1.3243492759809474, "learning_rate": 6.834876708990887e-06, "loss": 0.3202, "step": 4100 }, { "epoch": 0.39912408759124085, "grad_norm": 1.6002987225145617, "learning_rate": 6.833410429890948e-06, "loss": 0.3685, "step": 4101 }, { "epoch": 0.3992214111922141, "grad_norm": 1.3678479152711107, "learning_rate": 6.8319439686038905e-06, "loss": 0.4836, "step": 4102 }, { "epoch": 0.39931873479318736, "grad_norm": 1.4215810283225752, "learning_rate": 6.830477325275432e-06, "loss": 0.433, "step": 4103 }, { "epoch": 0.3994160583941606, "grad_norm": 1.2240968936921577, "learning_rate": 6.829010500051319e-06, "loss": 0.3181, "step": 4104 }, { "epoch": 0.3995133819951338, "grad_norm": 1.3093225983032648, "learning_rate": 6.8275434930773065e-06, "loss": 0.3464, "step": 4105 }, { "epoch": 0.39961070559610706, "grad_norm": 1.3776375009966235, "learning_rate": 6.826076304499174e-06, "loss": 0.4843, "step": 4106 }, { "epoch": 0.3997080291970803, "grad_norm": 1.2816280179506763, "learning_rate": 6.8246089344627174e-06, "loss": 0.3877, "step": 4107 }, { "epoch": 0.3998053527980535, "grad_norm": 1.739257469798359, "learning_rate": 6.823141383113748e-06, "loss": 0.5034, "step": 4108 }, { "epoch": 0.39990267639902677, "grad_norm": 1.433615286766659, "learning_rate": 6.8216736505981e-06, "loss": 0.456, "step": 4109 }, { "epoch": 0.4, "grad_norm": 1.352765027942841, "learning_rate": 6.820205737061621e-06, "loss": 0.4045, "step": 4110 }, { "epoch": 0.4000973236009732, "grad_norm": 1.5427269854846495, "learning_rate": 6.8187376426501795e-06, "loss": 0.5184, "step": 4111 }, { "epoch": 0.4001946472019465, "grad_norm": 1.3420595581039498, "learning_rate": 6.81726936750966e-06, "loss": 0.4387, "step": 4112 }, { "epoch": 0.4002919708029197, "grad_norm": 1.7226489838946462, "learning_rate": 6.815800911785968e-06, "loss": 0.6075, "step": 4113 }, { "epoch": 0.4003892944038929, "grad_norm": 1.4330949045341035, "learning_rate": 6.814332275625024e-06, "loss": 0.4566, "step": 4114 }, { "epoch": 0.4004866180048662, "grad_norm": 1.4735337129897166, "learning_rate": 6.812863459172765e-06, "loss": 0.3747, "step": 4115 }, { "epoch": 0.40058394160583943, "grad_norm": 1.1425351387480063, "learning_rate": 6.811394462575149e-06, "loss": 0.2628, "step": 4116 }, { "epoch": 0.40068126520681263, "grad_norm": 1.43999339592267, "learning_rate": 6.809925285978152e-06, "loss": 0.5026, "step": 4117 }, { "epoch": 0.4007785888077859, "grad_norm": 1.4172325219654425, "learning_rate": 6.808455929527768e-06, "loss": 0.4025, "step": 4118 }, { "epoch": 0.40087591240875914, "grad_norm": 1.7910927659189848, "learning_rate": 6.806986393370006e-06, "loss": 0.6098, "step": 4119 }, { "epoch": 0.40097323600973234, "grad_norm": 1.2075775786568461, "learning_rate": 6.805516677650896e-06, "loss": 0.3196, "step": 4120 }, { "epoch": 0.4010705596107056, "grad_norm": 1.211944505922398, "learning_rate": 6.804046782516483e-06, "loss": 0.3276, "step": 4121 }, { "epoch": 0.40116788321167884, "grad_norm": 1.4396684296663345, "learning_rate": 6.802576708112834e-06, "loss": 0.3646, "step": 4122 }, { "epoch": 0.40126520681265204, "grad_norm": 1.3856408026331406, "learning_rate": 6.801106454586028e-06, "loss": 0.4237, "step": 4123 }, { "epoch": 0.4013625304136253, "grad_norm": 1.3806721993456297, "learning_rate": 6.799636022082168e-06, "loss": 0.3891, "step": 4124 }, { "epoch": 0.40145985401459855, "grad_norm": 2.5197331783786487, "learning_rate": 6.79816541074737e-06, "loss": 0.373, "step": 4125 }, { "epoch": 0.4015571776155718, "grad_norm": 1.619394611459261, "learning_rate": 6.796694620727768e-06, "loss": 0.4296, "step": 4126 }, { "epoch": 0.401654501216545, "grad_norm": 1.462883942898207, "learning_rate": 6.795223652169519e-06, "loss": 0.3997, "step": 4127 }, { "epoch": 0.40175182481751825, "grad_norm": 1.5479315030125844, "learning_rate": 6.793752505218791e-06, "loss": 0.3695, "step": 4128 }, { "epoch": 0.4018491484184915, "grad_norm": 1.6944874808399704, "learning_rate": 6.792281180021776e-06, "loss": 0.5353, "step": 4129 }, { "epoch": 0.4019464720194647, "grad_norm": 1.4648916821584979, "learning_rate": 6.790809676724677e-06, "loss": 0.5375, "step": 4130 }, { "epoch": 0.40204379562043796, "grad_norm": 1.9179370512353164, "learning_rate": 6.7893379954737195e-06, "loss": 0.5927, "step": 4131 }, { "epoch": 0.4021411192214112, "grad_norm": 1.7096259956264923, "learning_rate": 6.787866136415148e-06, "loss": 0.2986, "step": 4132 }, { "epoch": 0.4022384428223844, "grad_norm": 1.5884427732942605, "learning_rate": 6.786394099695217e-06, "loss": 0.3867, "step": 4133 }, { "epoch": 0.40233576642335767, "grad_norm": 1.2109930210306181, "learning_rate": 6.784921885460207e-06, "loss": 0.2347, "step": 4134 }, { "epoch": 0.4024330900243309, "grad_norm": 1.675584901630843, "learning_rate": 6.783449493856412e-06, "loss": 0.3507, "step": 4135 }, { "epoch": 0.4025304136253041, "grad_norm": 0.9536842206516954, "learning_rate": 6.781976925030145e-06, "loss": 0.1722, "step": 4136 }, { "epoch": 0.40262773722627737, "grad_norm": 1.3099520338173072, "learning_rate": 6.780504179127735e-06, "loss": 0.2402, "step": 4137 }, { "epoch": 0.4027250608272506, "grad_norm": 1.3666225901140812, "learning_rate": 6.779031256295532e-06, "loss": 0.4447, "step": 4138 }, { "epoch": 0.4028223844282238, "grad_norm": 1.3539060879909202, "learning_rate": 6.777558156679898e-06, "loss": 0.2859, "step": 4139 }, { "epoch": 0.4029197080291971, "grad_norm": 1.3557388080611188, "learning_rate": 6.7760848804272184e-06, "loss": 0.4089, "step": 4140 }, { "epoch": 0.40301703163017033, "grad_norm": 2.077118594049834, "learning_rate": 6.774611427683891e-06, "loss": 0.4151, "step": 4141 }, { "epoch": 0.40311435523114353, "grad_norm": 1.5204722964451547, "learning_rate": 6.773137798596336e-06, "loss": 0.385, "step": 4142 }, { "epoch": 0.4032116788321168, "grad_norm": 1.2910878309871092, "learning_rate": 6.77166399331099e-06, "loss": 0.3261, "step": 4143 }, { "epoch": 0.40330900243309004, "grad_norm": 1.5435785947461726, "learning_rate": 6.770190011974302e-06, "loss": 0.487, "step": 4144 }, { "epoch": 0.40340632603406323, "grad_norm": 1.4881633401455383, "learning_rate": 6.768715854732745e-06, "loss": 0.4287, "step": 4145 }, { "epoch": 0.4035036496350365, "grad_norm": 1.7748194616136683, "learning_rate": 6.767241521732806e-06, "loss": 0.4205, "step": 4146 }, { "epoch": 0.40360097323600974, "grad_norm": 2.512912685985909, "learning_rate": 6.76576701312099e-06, "loss": 0.466, "step": 4147 }, { "epoch": 0.403698296836983, "grad_norm": 1.143470246335317, "learning_rate": 6.7642923290438215e-06, "loss": 0.2462, "step": 4148 }, { "epoch": 0.4037956204379562, "grad_norm": 1.8432461324879108, "learning_rate": 6.76281746964784e-06, "loss": 0.3258, "step": 4149 }, { "epoch": 0.40389294403892945, "grad_norm": 1.4157626221983473, "learning_rate": 6.761342435079604e-06, "loss": 0.3768, "step": 4150 }, { "epoch": 0.4039902676399027, "grad_norm": 1.47964929936211, "learning_rate": 6.7598672254856864e-06, "loss": 0.3548, "step": 4151 }, { "epoch": 0.4040875912408759, "grad_norm": 1.3968724705205968, "learning_rate": 6.75839184101268e-06, "loss": 0.4182, "step": 4152 }, { "epoch": 0.40418491484184915, "grad_norm": 1.529068307372447, "learning_rate": 6.7569162818071975e-06, "loss": 0.3009, "step": 4153 }, { "epoch": 0.4042822384428224, "grad_norm": 1.6397759556670952, "learning_rate": 6.755440548015864e-06, "loss": 0.4027, "step": 4154 }, { "epoch": 0.4043795620437956, "grad_norm": 1.7545455752932866, "learning_rate": 6.753964639785322e-06, "loss": 0.4641, "step": 4155 }, { "epoch": 0.40447688564476886, "grad_norm": 1.4464207704525447, "learning_rate": 6.752488557262239e-06, "loss": 0.4019, "step": 4156 }, { "epoch": 0.4045742092457421, "grad_norm": 1.241485877391119, "learning_rate": 6.7510123005932885e-06, "loss": 0.38, "step": 4157 }, { "epoch": 0.4046715328467153, "grad_norm": 1.5953709456211367, "learning_rate": 6.74953586992517e-06, "loss": 0.3787, "step": 4158 }, { "epoch": 0.40476885644768856, "grad_norm": 1.5557586966812027, "learning_rate": 6.748059265404598e-06, "loss": 0.5311, "step": 4159 }, { "epoch": 0.4048661800486618, "grad_norm": 1.3580123385569711, "learning_rate": 6.746582487178299e-06, "loss": 0.3035, "step": 4160 }, { "epoch": 0.404963503649635, "grad_norm": 1.4942228805738602, "learning_rate": 6.745105535393029e-06, "loss": 0.6116, "step": 4161 }, { "epoch": 0.40506082725060827, "grad_norm": 1.3580092661770409, "learning_rate": 6.7436284101955465e-06, "loss": 0.3137, "step": 4162 }, { "epoch": 0.4051581508515815, "grad_norm": 1.8589871426774351, "learning_rate": 6.7421511117326376e-06, "loss": 0.6386, "step": 4163 }, { "epoch": 0.4052554744525547, "grad_norm": 1.437956695986945, "learning_rate": 6.740673640151102e-06, "loss": 0.4982, "step": 4164 }, { "epoch": 0.405352798053528, "grad_norm": 1.535934910815706, "learning_rate": 6.739195995597757e-06, "loss": 0.4673, "step": 4165 }, { "epoch": 0.4054501216545012, "grad_norm": 1.5681489682180827, "learning_rate": 6.737718178219437e-06, "loss": 0.5436, "step": 4166 }, { "epoch": 0.4055474452554744, "grad_norm": 1.6606402959323747, "learning_rate": 6.736240188162995e-06, "loss": 0.5384, "step": 4167 }, { "epoch": 0.4056447688564477, "grad_norm": 1.4296428068919353, "learning_rate": 6.7347620255752955e-06, "loss": 0.5502, "step": 4168 }, { "epoch": 0.40574209245742093, "grad_norm": 1.1693215589274866, "learning_rate": 6.733283690603228e-06, "loss": 0.3637, "step": 4169 }, { "epoch": 0.4058394160583942, "grad_norm": 1.7804220182572144, "learning_rate": 6.731805183393696e-06, "loss": 0.3001, "step": 4170 }, { "epoch": 0.4059367396593674, "grad_norm": 1.3982681939127966, "learning_rate": 6.7303265040936185e-06, "loss": 0.3245, "step": 4171 }, { "epoch": 0.40603406326034064, "grad_norm": 1.2062326400377312, "learning_rate": 6.728847652849933e-06, "loss": 0.2714, "step": 4172 }, { "epoch": 0.4061313868613139, "grad_norm": 1.4699173204694587, "learning_rate": 6.727368629809592e-06, "loss": 0.5233, "step": 4173 }, { "epoch": 0.4062287104622871, "grad_norm": 1.9308275569367828, "learning_rate": 6.725889435119568e-06, "loss": 0.4341, "step": 4174 }, { "epoch": 0.40632603406326034, "grad_norm": 1.2643530305470765, "learning_rate": 6.724410068926852e-06, "loss": 0.282, "step": 4175 }, { "epoch": 0.4064233576642336, "grad_norm": 1.5309913572519684, "learning_rate": 6.722930531378446e-06, "loss": 0.3865, "step": 4176 }, { "epoch": 0.4065206812652068, "grad_norm": 1.394521077436856, "learning_rate": 6.721450822621376e-06, "loss": 0.3337, "step": 4177 }, { "epoch": 0.40661800486618005, "grad_norm": 1.445225810982358, "learning_rate": 6.719970942802678e-06, "loss": 0.3605, "step": 4178 }, { "epoch": 0.4067153284671533, "grad_norm": 1.5344015892195602, "learning_rate": 6.7184908920694115e-06, "loss": 0.4307, "step": 4179 }, { "epoch": 0.4068126520681265, "grad_norm": 1.396113291701313, "learning_rate": 6.717010670568648e-06, "loss": 0.444, "step": 4180 }, { "epoch": 0.40690997566909975, "grad_norm": 1.4766415367973333, "learning_rate": 6.715530278447479e-06, "loss": 0.3435, "step": 4181 }, { "epoch": 0.407007299270073, "grad_norm": 1.750623183326092, "learning_rate": 6.714049715853012e-06, "loss": 0.3844, "step": 4182 }, { "epoch": 0.4071046228710462, "grad_norm": 1.4982933826375, "learning_rate": 6.712568982932372e-06, "loss": 0.3814, "step": 4183 }, { "epoch": 0.40720194647201946, "grad_norm": 1.3051462794493567, "learning_rate": 6.711088079832697e-06, "loss": 0.346, "step": 4184 }, { "epoch": 0.4072992700729927, "grad_norm": 1.6547203197592473, "learning_rate": 6.709607006701149e-06, "loss": 0.5131, "step": 4185 }, { "epoch": 0.4073965936739659, "grad_norm": 1.5208934963641323, "learning_rate": 6.708125763684903e-06, "loss": 0.3869, "step": 4186 }, { "epoch": 0.40749391727493917, "grad_norm": 1.3749250911768454, "learning_rate": 6.706644350931149e-06, "loss": 0.3839, "step": 4187 }, { "epoch": 0.4075912408759124, "grad_norm": 1.0996995689031177, "learning_rate": 6.7051627685870966e-06, "loss": 0.2873, "step": 4188 }, { "epoch": 0.4076885644768856, "grad_norm": 1.4604592586007714, "learning_rate": 6.703681016799972e-06, "loss": 0.4912, "step": 4189 }, { "epoch": 0.40778588807785887, "grad_norm": 1.3866335130741587, "learning_rate": 6.702199095717018e-06, "loss": 0.5057, "step": 4190 }, { "epoch": 0.4078832116788321, "grad_norm": 1.4173847603754959, "learning_rate": 6.700717005485493e-06, "loss": 0.4536, "step": 4191 }, { "epoch": 0.4079805352798054, "grad_norm": 1.5420047114457367, "learning_rate": 6.699234746252676e-06, "loss": 0.616, "step": 4192 }, { "epoch": 0.4080778588807786, "grad_norm": 1.3569785732132214, "learning_rate": 6.697752318165855e-06, "loss": 0.3401, "step": 4193 }, { "epoch": 0.40817518248175183, "grad_norm": 1.1188023240624116, "learning_rate": 6.696269721372344e-06, "loss": 0.329, "step": 4194 }, { "epoch": 0.4082725060827251, "grad_norm": 1.3197371002133358, "learning_rate": 6.694786956019468e-06, "loss": 0.4845, "step": 4195 }, { "epoch": 0.4083698296836983, "grad_norm": 1.2069881394474367, "learning_rate": 6.69330402225457e-06, "loss": 0.3103, "step": 4196 }, { "epoch": 0.40846715328467154, "grad_norm": 1.272617146059266, "learning_rate": 6.691820920225011e-06, "loss": 0.3242, "step": 4197 }, { "epoch": 0.4085644768856448, "grad_norm": 1.2118257279828732, "learning_rate": 6.690337650078167e-06, "loss": 0.4317, "step": 4198 }, { "epoch": 0.408661800486618, "grad_norm": 1.4997553625886875, "learning_rate": 6.688854211961432e-06, "loss": 0.5907, "step": 4199 }, { "epoch": 0.40875912408759124, "grad_norm": 1.4858683252127711, "learning_rate": 6.687370606022214e-06, "loss": 0.5485, "step": 4200 }, { "epoch": 0.4088564476885645, "grad_norm": 1.2447962687685288, "learning_rate": 6.685886832407945e-06, "loss": 0.3447, "step": 4201 }, { "epoch": 0.4089537712895377, "grad_norm": 1.535995603376295, "learning_rate": 6.684402891266063e-06, "loss": 0.4063, "step": 4202 }, { "epoch": 0.40905109489051095, "grad_norm": 1.5783775807307692, "learning_rate": 6.682918782744033e-06, "loss": 0.6425, "step": 4203 }, { "epoch": 0.4091484184914842, "grad_norm": 1.4037569199049276, "learning_rate": 6.681434506989327e-06, "loss": 0.3833, "step": 4204 }, { "epoch": 0.4092457420924574, "grad_norm": 1.402178826370091, "learning_rate": 6.679950064149441e-06, "loss": 0.3551, "step": 4205 }, { "epoch": 0.40934306569343065, "grad_norm": 1.34468170512283, "learning_rate": 6.678465454371883e-06, "loss": 0.2863, "step": 4206 }, { "epoch": 0.4094403892944039, "grad_norm": 1.482507916794428, "learning_rate": 6.676980677804182e-06, "loss": 0.5207, "step": 4207 }, { "epoch": 0.4095377128953771, "grad_norm": 1.2804988426968886, "learning_rate": 6.675495734593882e-06, "loss": 0.3714, "step": 4208 }, { "epoch": 0.40963503649635036, "grad_norm": 1.8393375161927517, "learning_rate": 6.67401062488854e-06, "loss": 0.3356, "step": 4209 }, { "epoch": 0.4097323600973236, "grad_norm": 1.9062297382215951, "learning_rate": 6.672525348835734e-06, "loss": 0.4014, "step": 4210 }, { "epoch": 0.4098296836982968, "grad_norm": 1.5134950596073753, "learning_rate": 6.671039906583053e-06, "loss": 0.5023, "step": 4211 }, { "epoch": 0.40992700729927006, "grad_norm": 1.7782164726126763, "learning_rate": 6.669554298278113e-06, "loss": 0.464, "step": 4212 }, { "epoch": 0.4100243309002433, "grad_norm": 1.682379122922247, "learning_rate": 6.668068524068534e-06, "loss": 0.4665, "step": 4213 }, { "epoch": 0.41012165450121657, "grad_norm": 1.3325989707563795, "learning_rate": 6.666582584101962e-06, "loss": 0.465, "step": 4214 }, { "epoch": 0.41021897810218977, "grad_norm": 1.2220874474854295, "learning_rate": 6.665096478526054e-06, "loss": 0.2988, "step": 4215 }, { "epoch": 0.410316301703163, "grad_norm": 1.6164518148011633, "learning_rate": 6.663610207488483e-06, "loss": 0.5091, "step": 4216 }, { "epoch": 0.4104136253041363, "grad_norm": 1.565691092613228, "learning_rate": 6.662123771136946e-06, "loss": 0.4084, "step": 4217 }, { "epoch": 0.4105109489051095, "grad_norm": 1.5986808345528514, "learning_rate": 6.660637169619147e-06, "loss": 0.2829, "step": 4218 }, { "epoch": 0.4106082725060827, "grad_norm": 1.8220294500874799, "learning_rate": 6.659150403082812e-06, "loss": 0.5608, "step": 4219 }, { "epoch": 0.410705596107056, "grad_norm": 1.4769916213617307, "learning_rate": 6.6576634716756815e-06, "loss": 0.3977, "step": 4220 }, { "epoch": 0.4108029197080292, "grad_norm": 1.4616686781315174, "learning_rate": 6.656176375545513e-06, "loss": 0.2517, "step": 4221 }, { "epoch": 0.41090024330900243, "grad_norm": 1.1266940395897398, "learning_rate": 6.654689114840081e-06, "loss": 0.2244, "step": 4222 }, { "epoch": 0.4109975669099757, "grad_norm": 1.4101438231198031, "learning_rate": 6.653201689707174e-06, "loss": 0.4818, "step": 4223 }, { "epoch": 0.4110948905109489, "grad_norm": 1.296677172176164, "learning_rate": 6.6517141002946e-06, "loss": 0.3402, "step": 4224 }, { "epoch": 0.41119221411192214, "grad_norm": 1.3840057914039408, "learning_rate": 6.650226346750179e-06, "loss": 0.3776, "step": 4225 }, { "epoch": 0.4112895377128954, "grad_norm": 1.7375784492621011, "learning_rate": 6.6487384292217515e-06, "loss": 0.5091, "step": 4226 }, { "epoch": 0.4113868613138686, "grad_norm": 1.317135934057917, "learning_rate": 6.647250347857172e-06, "loss": 0.3428, "step": 4227 }, { "epoch": 0.41148418491484184, "grad_norm": 1.7823945099591982, "learning_rate": 6.645762102804316e-06, "loss": 0.5304, "step": 4228 }, { "epoch": 0.4115815085158151, "grad_norm": 1.6357287779301097, "learning_rate": 6.644273694211067e-06, "loss": 0.3339, "step": 4229 }, { "epoch": 0.4116788321167883, "grad_norm": 1.2562672855839114, "learning_rate": 6.6427851222253304e-06, "loss": 0.3303, "step": 4230 }, { "epoch": 0.41177615571776155, "grad_norm": 1.5704338823918165, "learning_rate": 6.641296386995025e-06, "loss": 0.4193, "step": 4231 }, { "epoch": 0.4118734793187348, "grad_norm": 1.5326708993630676, "learning_rate": 6.639807488668091e-06, "loss": 0.414, "step": 4232 }, { "epoch": 0.41197080291970806, "grad_norm": 1.2172543567321743, "learning_rate": 6.638318427392478e-06, "loss": 0.1645, "step": 4233 }, { "epoch": 0.41206812652068125, "grad_norm": 1.5519364904111816, "learning_rate": 6.636829203316155e-06, "loss": 0.3821, "step": 4234 }, { "epoch": 0.4121654501216545, "grad_norm": 1.190874224833403, "learning_rate": 6.635339816587109e-06, "loss": 0.3556, "step": 4235 }, { "epoch": 0.41226277372262776, "grad_norm": 1.1599374686849095, "learning_rate": 6.63385026735334e-06, "loss": 0.3749, "step": 4236 }, { "epoch": 0.41236009732360096, "grad_norm": 1.7378771365759709, "learning_rate": 6.632360555762865e-06, "loss": 0.4853, "step": 4237 }, { "epoch": 0.4124574209245742, "grad_norm": 1.6896892051906882, "learning_rate": 6.6308706819637195e-06, "loss": 0.3565, "step": 4238 }, { "epoch": 0.41255474452554747, "grad_norm": 1.5371662211782342, "learning_rate": 6.629380646103951e-06, "loss": 0.5813, "step": 4239 }, { "epoch": 0.41265206812652067, "grad_norm": 1.1350198221366297, "learning_rate": 6.627890448331627e-06, "loss": 0.2058, "step": 4240 }, { "epoch": 0.4127493917274939, "grad_norm": 1.4439504442984183, "learning_rate": 6.626400088794829e-06, "loss": 0.5246, "step": 4241 }, { "epoch": 0.4128467153284672, "grad_norm": 1.497251694970502, "learning_rate": 6.624909567641653e-06, "loss": 0.3753, "step": 4242 }, { "epoch": 0.41294403892944037, "grad_norm": 2.013436245382801, "learning_rate": 6.623418885020214e-06, "loss": 0.3015, "step": 4243 }, { "epoch": 0.4130413625304136, "grad_norm": 1.4477910414978084, "learning_rate": 6.621928041078645e-06, "loss": 0.3851, "step": 4244 }, { "epoch": 0.4131386861313869, "grad_norm": 1.3762994174937473, "learning_rate": 6.620437035965088e-06, "loss": 0.3556, "step": 4245 }, { "epoch": 0.4132360097323601, "grad_norm": 1.267972676100091, "learning_rate": 6.618945869827708e-06, "loss": 0.3547, "step": 4246 }, { "epoch": 0.41333333333333333, "grad_norm": 1.428605801305056, "learning_rate": 6.617454542814681e-06, "loss": 0.352, "step": 4247 }, { "epoch": 0.4134306569343066, "grad_norm": 2.0199889027885565, "learning_rate": 6.615963055074202e-06, "loss": 0.3482, "step": 4248 }, { "epoch": 0.4135279805352798, "grad_norm": 1.585931486672713, "learning_rate": 6.614471406754479e-06, "loss": 0.4499, "step": 4249 }, { "epoch": 0.41362530413625304, "grad_norm": 1.6316825936189212, "learning_rate": 6.612979598003743e-06, "loss": 0.4293, "step": 4250 }, { "epoch": 0.4137226277372263, "grad_norm": 1.4668132105500566, "learning_rate": 6.611487628970232e-06, "loss": 0.3095, "step": 4251 }, { "epoch": 0.4138199513381995, "grad_norm": 1.679886846024144, "learning_rate": 6.609995499802204e-06, "loss": 0.3704, "step": 4252 }, { "epoch": 0.41391727493917274, "grad_norm": 1.558010365002375, "learning_rate": 6.608503210647934e-06, "loss": 0.456, "step": 4253 }, { "epoch": 0.414014598540146, "grad_norm": 1.5296306221570222, "learning_rate": 6.607010761655711e-06, "loss": 0.3853, "step": 4254 }, { "epoch": 0.41411192214111925, "grad_norm": 1.683084220519706, "learning_rate": 6.605518152973842e-06, "loss": 0.4359, "step": 4255 }, { "epoch": 0.41420924574209245, "grad_norm": 1.3129724378896062, "learning_rate": 6.604025384750646e-06, "loss": 0.4273, "step": 4256 }, { "epoch": 0.4143065693430657, "grad_norm": 1.4342940696208062, "learning_rate": 6.602532457134463e-06, "loss": 0.4389, "step": 4257 }, { "epoch": 0.41440389294403895, "grad_norm": 3.280891471530478, "learning_rate": 6.6010393702736444e-06, "loss": 0.2364, "step": 4258 }, { "epoch": 0.41450121654501215, "grad_norm": 1.4615220313171857, "learning_rate": 6.599546124316558e-06, "loss": 0.3685, "step": 4259 }, { "epoch": 0.4145985401459854, "grad_norm": 1.2574907922123244, "learning_rate": 6.598052719411592e-06, "loss": 0.3912, "step": 4260 }, { "epoch": 0.41469586374695866, "grad_norm": 1.5069809434087071, "learning_rate": 6.596559155707144e-06, "loss": 0.372, "step": 4261 }, { "epoch": 0.41479318734793186, "grad_norm": 1.4890510390522098, "learning_rate": 6.595065433351631e-06, "loss": 0.3621, "step": 4262 }, { "epoch": 0.4148905109489051, "grad_norm": 2.49061644970277, "learning_rate": 6.5935715524934865e-06, "loss": 0.3959, "step": 4263 }, { "epoch": 0.41498783454987836, "grad_norm": 1.3665559018226794, "learning_rate": 6.5920775132811565e-06, "loss": 0.3345, "step": 4264 }, { "epoch": 0.41508515815085156, "grad_norm": 1.7454356205611485, "learning_rate": 6.590583315863106e-06, "loss": 0.5648, "step": 4265 }, { "epoch": 0.4151824817518248, "grad_norm": 1.1846482014659763, "learning_rate": 6.589088960387814e-06, "loss": 0.2959, "step": 4266 }, { "epoch": 0.41527980535279807, "grad_norm": 1.403873385053726, "learning_rate": 6.5875944470037745e-06, "loss": 0.3663, "step": 4267 }, { "epoch": 0.41537712895377127, "grad_norm": 1.3346226339580947, "learning_rate": 6.5860997758595005e-06, "loss": 0.3677, "step": 4268 }, { "epoch": 0.4154744525547445, "grad_norm": 1.6581717803832259, "learning_rate": 6.584604947103515e-06, "loss": 0.4299, "step": 4269 }, { "epoch": 0.4155717761557178, "grad_norm": 1.6137507681397871, "learning_rate": 6.583109960884362e-06, "loss": 0.4912, "step": 4270 }, { "epoch": 0.415669099756691, "grad_norm": 1.542312984907508, "learning_rate": 6.5816148173506e-06, "loss": 0.638, "step": 4271 }, { "epoch": 0.4157664233576642, "grad_norm": 1.4644079755455965, "learning_rate": 6.5801195166508e-06, "loss": 0.2901, "step": 4272 }, { "epoch": 0.4158637469586375, "grad_norm": 2.0993461408388985, "learning_rate": 6.578624058933555e-06, "loss": 0.3534, "step": 4273 }, { "epoch": 0.4159610705596107, "grad_norm": 1.529766064380746, "learning_rate": 6.577128444347465e-06, "loss": 0.3926, "step": 4274 }, { "epoch": 0.41605839416058393, "grad_norm": 1.3503570473016178, "learning_rate": 6.575632673041153e-06, "loss": 0.3662, "step": 4275 }, { "epoch": 0.4161557177615572, "grad_norm": 1.7770233200809984, "learning_rate": 6.574136745163253e-06, "loss": 0.4091, "step": 4276 }, { "epoch": 0.41625304136253044, "grad_norm": 1.3168899598288053, "learning_rate": 6.5726406608624185e-06, "loss": 0.2383, "step": 4277 }, { "epoch": 0.41635036496350364, "grad_norm": 1.4856806584236355, "learning_rate": 6.571144420287314e-06, "loss": 0.4036, "step": 4278 }, { "epoch": 0.4164476885644769, "grad_norm": 1.4152039194265187, "learning_rate": 6.569648023586624e-06, "loss": 0.3096, "step": 4279 }, { "epoch": 0.41654501216545015, "grad_norm": 1.4017120468664837, "learning_rate": 6.568151470909042e-06, "loss": 0.4778, "step": 4280 }, { "epoch": 0.41664233576642334, "grad_norm": 1.1741744814184691, "learning_rate": 6.566654762403286e-06, "loss": 0.3247, "step": 4281 }, { "epoch": 0.4167396593673966, "grad_norm": 1.4044574757131045, "learning_rate": 6.5651578982180845e-06, "loss": 0.3485, "step": 4282 }, { "epoch": 0.41683698296836985, "grad_norm": 1.605059643525527, "learning_rate": 6.56366087850218e-06, "loss": 0.5178, "step": 4283 }, { "epoch": 0.41693430656934305, "grad_norm": 1.6798776918121874, "learning_rate": 6.562163703404333e-06, "loss": 0.4953, "step": 4284 }, { "epoch": 0.4170316301703163, "grad_norm": 1.3694256582426696, "learning_rate": 6.560666373073317e-06, "loss": 0.3987, "step": 4285 }, { "epoch": 0.41712895377128956, "grad_norm": 1.6897161554637383, "learning_rate": 6.559168887657926e-06, "loss": 0.5096, "step": 4286 }, { "epoch": 0.41722627737226275, "grad_norm": 1.3446734604223378, "learning_rate": 6.557671247306965e-06, "loss": 0.3865, "step": 4287 }, { "epoch": 0.417323600973236, "grad_norm": 1.1928604006824761, "learning_rate": 6.556173452169252e-06, "loss": 0.2174, "step": 4288 }, { "epoch": 0.41742092457420926, "grad_norm": 1.2595066957738728, "learning_rate": 6.554675502393629e-06, "loss": 0.2598, "step": 4289 }, { "epoch": 0.41751824817518246, "grad_norm": 1.3670007196632394, "learning_rate": 6.5531773981289436e-06, "loss": 0.3852, "step": 4290 }, { "epoch": 0.4176155717761557, "grad_norm": 1.2671207019965953, "learning_rate": 6.551679139524068e-06, "loss": 0.3286, "step": 4291 }, { "epoch": 0.41771289537712897, "grad_norm": 3.291438678298298, "learning_rate": 6.55018072672788e-06, "loss": 0.341, "step": 4292 }, { "epoch": 0.41781021897810217, "grad_norm": 1.1986094365133406, "learning_rate": 6.548682159889284e-06, "loss": 0.3322, "step": 4293 }, { "epoch": 0.4179075425790754, "grad_norm": 1.7308394678650263, "learning_rate": 6.547183439157187e-06, "loss": 0.6067, "step": 4294 }, { "epoch": 0.4180048661800487, "grad_norm": 1.3996236022686042, "learning_rate": 6.54568456468052e-06, "loss": 0.47, "step": 4295 }, { "epoch": 0.41810218978102187, "grad_norm": 1.33827157270381, "learning_rate": 6.54418553660823e-06, "loss": 0.3526, "step": 4296 }, { "epoch": 0.4181995133819951, "grad_norm": 1.0611044750927061, "learning_rate": 6.542686355089273e-06, "loss": 0.2076, "step": 4297 }, { "epoch": 0.4182968369829684, "grad_norm": 1.483729653865723, "learning_rate": 6.541187020272624e-06, "loss": 0.4021, "step": 4298 }, { "epoch": 0.41839416058394163, "grad_norm": 1.4550107799537069, "learning_rate": 6.539687532307275e-06, "loss": 0.5588, "step": 4299 }, { "epoch": 0.41849148418491483, "grad_norm": 1.20942672500593, "learning_rate": 6.538187891342228e-06, "loss": 0.3367, "step": 4300 }, { "epoch": 0.4185888077858881, "grad_norm": 1.3240648028254882, "learning_rate": 6.5366880975265055e-06, "loss": 0.341, "step": 4301 }, { "epoch": 0.41868613138686134, "grad_norm": 1.805264599497935, "learning_rate": 6.535188151009143e-06, "loss": 0.2554, "step": 4302 }, { "epoch": 0.41878345498783454, "grad_norm": 1.1279014154499223, "learning_rate": 6.53368805193919e-06, "loss": 0.1854, "step": 4303 }, { "epoch": 0.4188807785888078, "grad_norm": 1.5248162706105455, "learning_rate": 6.532187800465713e-06, "loss": 0.5458, "step": 4304 }, { "epoch": 0.41897810218978104, "grad_norm": 0.9949928333273576, "learning_rate": 6.5306873967377916e-06, "loss": 0.2022, "step": 4305 }, { "epoch": 0.41907542579075424, "grad_norm": 1.3149218240859954, "learning_rate": 6.5291868409045226e-06, "loss": 0.3624, "step": 4306 }, { "epoch": 0.4191727493917275, "grad_norm": 1.5395304167054258, "learning_rate": 6.5276861331150175e-06, "loss": 0.4642, "step": 4307 }, { "epoch": 0.41927007299270075, "grad_norm": 1.2063026080741097, "learning_rate": 6.526185273518402e-06, "loss": 0.2982, "step": 4308 }, { "epoch": 0.41936739659367395, "grad_norm": 1.4265007816263084, "learning_rate": 6.52468426226382e-06, "loss": 0.3693, "step": 4309 }, { "epoch": 0.4194647201946472, "grad_norm": 1.6530646156286823, "learning_rate": 6.523183099500423e-06, "loss": 0.3381, "step": 4310 }, { "epoch": 0.41956204379562045, "grad_norm": 1.222949008199894, "learning_rate": 6.521681785377386e-06, "loss": 0.2989, "step": 4311 }, { "epoch": 0.41965936739659365, "grad_norm": 1.4404214942828424, "learning_rate": 6.520180320043894e-06, "loss": 0.5608, "step": 4312 }, { "epoch": 0.4197566909975669, "grad_norm": 1.4377809332947757, "learning_rate": 6.51867870364915e-06, "loss": 0.3673, "step": 4313 }, { "epoch": 0.41985401459854016, "grad_norm": 1.0479588946915206, "learning_rate": 6.517176936342372e-06, "loss": 0.1978, "step": 4314 }, { "epoch": 0.41995133819951336, "grad_norm": 1.2313516991145266, "learning_rate": 6.515675018272787e-06, "loss": 0.2914, "step": 4315 }, { "epoch": 0.4200486618004866, "grad_norm": 1.1594822014038484, "learning_rate": 6.514172949589644e-06, "loss": 0.3001, "step": 4316 }, { "epoch": 0.42014598540145986, "grad_norm": 1.2029717087625669, "learning_rate": 6.5126707304422035e-06, "loss": 0.3405, "step": 4317 }, { "epoch": 0.42024330900243306, "grad_norm": 1.16346382260932, "learning_rate": 6.5111683609797435e-06, "loss": 0.2537, "step": 4318 }, { "epoch": 0.4203406326034063, "grad_norm": 1.4210857674247095, "learning_rate": 6.509665841351555e-06, "loss": 0.5547, "step": 4319 }, { "epoch": 0.42043795620437957, "grad_norm": 1.5171779266903973, "learning_rate": 6.508163171706944e-06, "loss": 0.5007, "step": 4320 }, { "epoch": 0.4205352798053528, "grad_norm": 1.3552902060116079, "learning_rate": 6.506660352195231e-06, "loss": 0.4022, "step": 4321 }, { "epoch": 0.420632603406326, "grad_norm": 1.145145186502107, "learning_rate": 6.505157382965752e-06, "loss": 0.3137, "step": 4322 }, { "epoch": 0.4207299270072993, "grad_norm": 1.5286464745316035, "learning_rate": 6.503654264167861e-06, "loss": 0.5978, "step": 4323 }, { "epoch": 0.42082725060827253, "grad_norm": 1.426417712983417, "learning_rate": 6.50215099595092e-06, "loss": 0.3787, "step": 4324 }, { "epoch": 0.4209245742092457, "grad_norm": 1.2648697000425113, "learning_rate": 6.500647578464312e-06, "loss": 0.3706, "step": 4325 }, { "epoch": 0.421021897810219, "grad_norm": 1.3051542258204123, "learning_rate": 6.499144011857431e-06, "loss": 0.3678, "step": 4326 }, { "epoch": 0.42111922141119223, "grad_norm": 1.3581541871587341, "learning_rate": 6.497640296279688e-06, "loss": 0.3831, "step": 4327 }, { "epoch": 0.42121654501216543, "grad_norm": 1.1964601480022, "learning_rate": 6.496136431880509e-06, "loss": 0.3243, "step": 4328 }, { "epoch": 0.4213138686131387, "grad_norm": 1.339441934284622, "learning_rate": 6.4946324188093325e-06, "loss": 0.3394, "step": 4329 }, { "epoch": 0.42141119221411194, "grad_norm": 1.302596375163602, "learning_rate": 6.493128257215614e-06, "loss": 0.3479, "step": 4330 }, { "epoch": 0.42150851581508514, "grad_norm": 1.3886290381325908, "learning_rate": 6.491623947248824e-06, "loss": 0.5067, "step": 4331 }, { "epoch": 0.4216058394160584, "grad_norm": 1.2676024824015228, "learning_rate": 6.490119489058444e-06, "loss": 0.3229, "step": 4332 }, { "epoch": 0.42170316301703165, "grad_norm": 1.2119833091115328, "learning_rate": 6.488614882793974e-06, "loss": 0.3683, "step": 4333 }, { "epoch": 0.42180048661800484, "grad_norm": 1.4227111370092373, "learning_rate": 6.48711012860493e-06, "loss": 0.3916, "step": 4334 }, { "epoch": 0.4218978102189781, "grad_norm": 1.1478638546259368, "learning_rate": 6.4856052266408375e-06, "loss": 0.271, "step": 4335 }, { "epoch": 0.42199513381995135, "grad_norm": 1.5635814738770923, "learning_rate": 6.484100177051242e-06, "loss": 0.4534, "step": 4336 }, { "epoch": 0.42209245742092455, "grad_norm": 1.3956104024626772, "learning_rate": 6.4825949799856966e-06, "loss": 0.4618, "step": 4337 }, { "epoch": 0.4221897810218978, "grad_norm": 1.4196881255329625, "learning_rate": 6.481089635593778e-06, "loss": 0.4528, "step": 4338 }, { "epoch": 0.42228710462287106, "grad_norm": 1.400784106020295, "learning_rate": 6.479584144025073e-06, "loss": 0.5102, "step": 4339 }, { "epoch": 0.42238442822384425, "grad_norm": 2.468078425362333, "learning_rate": 6.4780785054291816e-06, "loss": 0.5655, "step": 4340 }, { "epoch": 0.4224817518248175, "grad_norm": 1.6227598889110846, "learning_rate": 6.476572719955721e-06, "loss": 0.4855, "step": 4341 }, { "epoch": 0.42257907542579076, "grad_norm": 1.306678121396544, "learning_rate": 6.475066787754322e-06, "loss": 0.4072, "step": 4342 }, { "epoch": 0.422676399026764, "grad_norm": 1.37061329653536, "learning_rate": 6.473560708974628e-06, "loss": 0.4673, "step": 4343 }, { "epoch": 0.4227737226277372, "grad_norm": 1.3905380736196298, "learning_rate": 6.472054483766301e-06, "loss": 0.2885, "step": 4344 }, { "epoch": 0.42287104622871047, "grad_norm": 4.801162046596706, "learning_rate": 6.470548112279016e-06, "loss": 0.5519, "step": 4345 }, { "epoch": 0.4229683698296837, "grad_norm": 1.3633612579215535, "learning_rate": 6.46904159466246e-06, "loss": 0.4516, "step": 4346 }, { "epoch": 0.4230656934306569, "grad_norm": 1.4357055576360107, "learning_rate": 6.4675349310663406e-06, "loss": 0.4457, "step": 4347 }, { "epoch": 0.4231630170316302, "grad_norm": 1.6129882398627167, "learning_rate": 6.466028121640371e-06, "loss": 0.4231, "step": 4348 }, { "epoch": 0.4232603406326034, "grad_norm": 1.3445730128609834, "learning_rate": 6.464521166534285e-06, "loss": 0.235, "step": 4349 }, { "epoch": 0.4233576642335766, "grad_norm": 1.3167614556649498, "learning_rate": 6.4630140658978315e-06, "loss": 0.4414, "step": 4350 }, { "epoch": 0.4234549878345499, "grad_norm": 1.4125450802251278, "learning_rate": 6.461506819880772e-06, "loss": 0.4684, "step": 4351 }, { "epoch": 0.42355231143552313, "grad_norm": 1.4953055035817089, "learning_rate": 6.459999428632882e-06, "loss": 0.4405, "step": 4352 }, { "epoch": 0.42364963503649633, "grad_norm": 1.3815431102607814, "learning_rate": 6.458491892303948e-06, "loss": 0.3887, "step": 4353 }, { "epoch": 0.4237469586374696, "grad_norm": 1.4957571422897962, "learning_rate": 6.4569842110437795e-06, "loss": 0.5109, "step": 4354 }, { "epoch": 0.42384428223844284, "grad_norm": 1.4325727522185985, "learning_rate": 6.455476385002195e-06, "loss": 0.3701, "step": 4355 }, { "epoch": 0.42394160583941604, "grad_norm": 1.3057908743402888, "learning_rate": 6.453968414329029e-06, "loss": 0.3755, "step": 4356 }, { "epoch": 0.4240389294403893, "grad_norm": 1.1674257374901074, "learning_rate": 6.452460299174126e-06, "loss": 0.3337, "step": 4357 }, { "epoch": 0.42413625304136254, "grad_norm": 1.5147142328351313, "learning_rate": 6.450952039687352e-06, "loss": 0.2443, "step": 4358 }, { "epoch": 0.42423357664233574, "grad_norm": 1.5905612385678973, "learning_rate": 6.449443636018579e-06, "loss": 0.3695, "step": 4359 }, { "epoch": 0.424330900243309, "grad_norm": 1.3783491268121844, "learning_rate": 6.447935088317704e-06, "loss": 0.4123, "step": 4360 }, { "epoch": 0.42442822384428225, "grad_norm": 1.6061178783894494, "learning_rate": 6.4464263967346286e-06, "loss": 0.5431, "step": 4361 }, { "epoch": 0.42452554744525545, "grad_norm": 1.4425269424857767, "learning_rate": 6.444917561419272e-06, "loss": 0.4448, "step": 4362 }, { "epoch": 0.4246228710462287, "grad_norm": 1.6012552106118871, "learning_rate": 6.443408582521571e-06, "loss": 0.4083, "step": 4363 }, { "epoch": 0.42472019464720195, "grad_norm": 1.0459143942818723, "learning_rate": 6.4418994601914695e-06, "loss": 0.1693, "step": 4364 }, { "epoch": 0.4248175182481752, "grad_norm": 1.404870861675631, "learning_rate": 6.4403901945789335e-06, "loss": 0.2467, "step": 4365 }, { "epoch": 0.4249148418491484, "grad_norm": 1.6503783456921153, "learning_rate": 6.438880785833938e-06, "loss": 0.6503, "step": 4366 }, { "epoch": 0.42501216545012166, "grad_norm": 1.4886065750794846, "learning_rate": 6.437371234106476e-06, "loss": 0.3345, "step": 4367 }, { "epoch": 0.4251094890510949, "grad_norm": 1.5814934463968207, "learning_rate": 6.4358615395465506e-06, "loss": 0.5983, "step": 4368 }, { "epoch": 0.4252068126520681, "grad_norm": 1.5092673127042504, "learning_rate": 6.43435170230418e-06, "loss": 0.3829, "step": 4369 }, { "epoch": 0.42530413625304136, "grad_norm": 1.3432402420129697, "learning_rate": 6.4328417225294015e-06, "loss": 0.4439, "step": 4370 }, { "epoch": 0.4254014598540146, "grad_norm": 1.623766357480442, "learning_rate": 6.431331600372259e-06, "loss": 0.4843, "step": 4371 }, { "epoch": 0.4254987834549878, "grad_norm": 1.7930756580668195, "learning_rate": 6.4298213359828155e-06, "loss": 0.3407, "step": 4372 }, { "epoch": 0.42559610705596107, "grad_norm": 1.2559785919794741, "learning_rate": 6.428310929511146e-06, "loss": 0.2498, "step": 4373 }, { "epoch": 0.4256934306569343, "grad_norm": 1.5454160958367298, "learning_rate": 6.426800381107343e-06, "loss": 0.4964, "step": 4374 }, { "epoch": 0.4257907542579075, "grad_norm": 1.2723938579546594, "learning_rate": 6.425289690921509e-06, "loss": 0.2513, "step": 4375 }, { "epoch": 0.4258880778588808, "grad_norm": 1.277636479890389, "learning_rate": 6.423778859103762e-06, "loss": 0.2989, "step": 4376 }, { "epoch": 0.42598540145985403, "grad_norm": 1.4447771060194305, "learning_rate": 6.4222678858042355e-06, "loss": 0.5214, "step": 4377 }, { "epoch": 0.4260827250608272, "grad_norm": 1.870842966622725, "learning_rate": 6.420756771173075e-06, "loss": 0.284, "step": 4378 }, { "epoch": 0.4261800486618005, "grad_norm": 1.941139248883791, "learning_rate": 6.419245515360441e-06, "loss": 0.4939, "step": 4379 }, { "epoch": 0.42627737226277373, "grad_norm": 1.4323796008311047, "learning_rate": 6.4177341185165045e-06, "loss": 0.4712, "step": 4380 }, { "epoch": 0.42637469586374693, "grad_norm": 1.495438155711353, "learning_rate": 6.41622258079146e-06, "loss": 0.3948, "step": 4381 }, { "epoch": 0.4264720194647202, "grad_norm": 1.275165719126958, "learning_rate": 6.414710902335507e-06, "loss": 0.3273, "step": 4382 }, { "epoch": 0.42656934306569344, "grad_norm": 1.1470699861754334, "learning_rate": 6.413199083298862e-06, "loss": 0.2802, "step": 4383 }, { "epoch": 0.4266666666666667, "grad_norm": 1.609847804866654, "learning_rate": 6.411687123831756e-06, "loss": 0.4448, "step": 4384 }, { "epoch": 0.4267639902676399, "grad_norm": 1.1993407147045914, "learning_rate": 6.4101750240844315e-06, "loss": 0.2549, "step": 4385 }, { "epoch": 0.42686131386861315, "grad_norm": 1.2585478345095418, "learning_rate": 6.408662784207149e-06, "loss": 0.4034, "step": 4386 }, { "epoch": 0.4269586374695864, "grad_norm": 1.561278514250439, "learning_rate": 6.40715040435018e-06, "loss": 0.325, "step": 4387 }, { "epoch": 0.4270559610705596, "grad_norm": 1.328127199058956, "learning_rate": 6.40563788466381e-06, "loss": 0.4286, "step": 4388 }, { "epoch": 0.42715328467153285, "grad_norm": 1.3392689531276551, "learning_rate": 6.40412522529834e-06, "loss": 0.2742, "step": 4389 }, { "epoch": 0.4272506082725061, "grad_norm": 1.5915708416492296, "learning_rate": 6.4026124264040824e-06, "loss": 0.4089, "step": 4390 }, { "epoch": 0.4273479318734793, "grad_norm": 1.5011107782482271, "learning_rate": 6.401099488131366e-06, "loss": 0.4322, "step": 4391 }, { "epoch": 0.42744525547445256, "grad_norm": 1.5212139628772348, "learning_rate": 6.399586410630533e-06, "loss": 0.4463, "step": 4392 }, { "epoch": 0.4275425790754258, "grad_norm": 1.1681823535144769, "learning_rate": 6.398073194051937e-06, "loss": 0.2473, "step": 4393 }, { "epoch": 0.427639902676399, "grad_norm": 1.587136703131999, "learning_rate": 6.396559838545949e-06, "loss": 0.3338, "step": 4394 }, { "epoch": 0.42773722627737226, "grad_norm": 1.493821772044209, "learning_rate": 6.395046344262951e-06, "loss": 0.4352, "step": 4395 }, { "epoch": 0.4278345498783455, "grad_norm": 1.3434445677218851, "learning_rate": 6.393532711353341e-06, "loss": 0.3475, "step": 4396 }, { "epoch": 0.4279318734793187, "grad_norm": 1.135742712307757, "learning_rate": 6.3920189399675295e-06, "loss": 0.2778, "step": 4397 }, { "epoch": 0.42802919708029197, "grad_norm": 1.582300911985823, "learning_rate": 6.390505030255939e-06, "loss": 0.4054, "step": 4398 }, { "epoch": 0.4281265206812652, "grad_norm": 1.4579512322620043, "learning_rate": 6.38899098236901e-06, "loss": 0.355, "step": 4399 }, { "epoch": 0.4282238442822384, "grad_norm": 1.3073200004921628, "learning_rate": 6.387476796457192e-06, "loss": 0.4277, "step": 4400 }, { "epoch": 0.4283211678832117, "grad_norm": 1.52175732418107, "learning_rate": 6.385962472670953e-06, "loss": 0.411, "step": 4401 }, { "epoch": 0.4284184914841849, "grad_norm": 1.9451581901413757, "learning_rate": 6.384448011160771e-06, "loss": 0.3991, "step": 4402 }, { "epoch": 0.4285158150851581, "grad_norm": 1.4320770472785467, "learning_rate": 6.38293341207714e-06, "loss": 0.5328, "step": 4403 }, { "epoch": 0.4286131386861314, "grad_norm": 1.4130794838716443, "learning_rate": 6.3814186755705645e-06, "loss": 0.3512, "step": 4404 }, { "epoch": 0.42871046228710463, "grad_norm": 1.3562391219164305, "learning_rate": 6.379903801791567e-06, "loss": 0.428, "step": 4405 }, { "epoch": 0.4288077858880779, "grad_norm": 1.2879718212279418, "learning_rate": 6.3783887908906805e-06, "loss": 0.369, "step": 4406 }, { "epoch": 0.4289051094890511, "grad_norm": 1.775559017643254, "learning_rate": 6.376873643018452e-06, "loss": 0.5678, "step": 4407 }, { "epoch": 0.42900243309002434, "grad_norm": 1.6372660811981936, "learning_rate": 6.375358358325444e-06, "loss": 0.6129, "step": 4408 }, { "epoch": 0.4290997566909976, "grad_norm": 1.565161231635583, "learning_rate": 6.37384293696223e-06, "loss": 0.5301, "step": 4409 }, { "epoch": 0.4291970802919708, "grad_norm": 1.3083780139331866, "learning_rate": 6.3723273790793995e-06, "loss": 0.3379, "step": 4410 }, { "epoch": 0.42929440389294404, "grad_norm": 1.7242515598869155, "learning_rate": 6.370811684827553e-06, "loss": 0.436, "step": 4411 }, { "epoch": 0.4293917274939173, "grad_norm": 1.2145655273563092, "learning_rate": 6.369295854357307e-06, "loss": 0.2734, "step": 4412 }, { "epoch": 0.4294890510948905, "grad_norm": 1.4312853613070817, "learning_rate": 6.36777988781929e-06, "loss": 0.2536, "step": 4413 }, { "epoch": 0.42958637469586375, "grad_norm": 1.3921604894550261, "learning_rate": 6.366263785364146e-06, "loss": 0.4233, "step": 4414 }, { "epoch": 0.429683698296837, "grad_norm": 1.550203845248266, "learning_rate": 6.36474754714253e-06, "loss": 0.4369, "step": 4415 }, { "epoch": 0.4297810218978102, "grad_norm": 1.5134552831430093, "learning_rate": 6.363231173305111e-06, "loss": 0.2749, "step": 4416 }, { "epoch": 0.42987834549878345, "grad_norm": 1.4415675127333825, "learning_rate": 6.361714664002572e-06, "loss": 0.2797, "step": 4417 }, { "epoch": 0.4299756690997567, "grad_norm": 1.3612060930020984, "learning_rate": 6.360198019385609e-06, "loss": 0.3666, "step": 4418 }, { "epoch": 0.4300729927007299, "grad_norm": 1.043670304048288, "learning_rate": 6.358681239604935e-06, "loss": 0.2156, "step": 4419 }, { "epoch": 0.43017031630170316, "grad_norm": 1.21375846836267, "learning_rate": 6.357164324811269e-06, "loss": 0.3139, "step": 4420 }, { "epoch": 0.4302676399026764, "grad_norm": 1.300403987028639, "learning_rate": 6.355647275155351e-06, "loss": 0.354, "step": 4421 }, { "epoch": 0.4303649635036496, "grad_norm": 1.5100041407092586, "learning_rate": 6.354130090787929e-06, "loss": 0.4109, "step": 4422 }, { "epoch": 0.43046228710462286, "grad_norm": 1.3727074497945866, "learning_rate": 6.352612771859769e-06, "loss": 0.4171, "step": 4423 }, { "epoch": 0.4305596107055961, "grad_norm": 1.3703693530338585, "learning_rate": 6.351095318521646e-06, "loss": 0.4578, "step": 4424 }, { "epoch": 0.4306569343065693, "grad_norm": 1.7249543999435943, "learning_rate": 6.34957773092435e-06, "loss": 0.4751, "step": 4425 }, { "epoch": 0.43075425790754257, "grad_norm": 1.540658191377533, "learning_rate": 6.3480600092186865e-06, "loss": 0.531, "step": 4426 }, { "epoch": 0.4308515815085158, "grad_norm": 1.4931397919463056, "learning_rate": 6.346542153555471e-06, "loss": 0.2869, "step": 4427 }, { "epoch": 0.4309489051094891, "grad_norm": 1.2386309493951129, "learning_rate": 6.345024164085533e-06, "loss": 0.3842, "step": 4428 }, { "epoch": 0.4310462287104623, "grad_norm": 1.8030870615440533, "learning_rate": 6.343506040959717e-06, "loss": 0.4934, "step": 4429 }, { "epoch": 0.43114355231143553, "grad_norm": 1.297247362108796, "learning_rate": 6.341987784328881e-06, "loss": 0.3116, "step": 4430 }, { "epoch": 0.4312408759124088, "grad_norm": 1.7662444251877587, "learning_rate": 6.340469394343895e-06, "loss": 0.4347, "step": 4431 }, { "epoch": 0.431338199513382, "grad_norm": 1.377574504492093, "learning_rate": 6.338950871155641e-06, "loss": 0.3021, "step": 4432 }, { "epoch": 0.43143552311435523, "grad_norm": 1.2448102026587469, "learning_rate": 6.337432214915014e-06, "loss": 0.3662, "step": 4433 }, { "epoch": 0.4315328467153285, "grad_norm": 1.609409665225116, "learning_rate": 6.335913425772926e-06, "loss": 0.5899, "step": 4434 }, { "epoch": 0.4316301703163017, "grad_norm": 1.5454437107583565, "learning_rate": 6.334394503880301e-06, "loss": 0.4264, "step": 4435 }, { "epoch": 0.43172749391727494, "grad_norm": 1.454423385884074, "learning_rate": 6.332875449388074e-06, "loss": 0.3231, "step": 4436 }, { "epoch": 0.4318248175182482, "grad_norm": 1.4621439149788715, "learning_rate": 6.3313562624471944e-06, "loss": 0.4866, "step": 4437 }, { "epoch": 0.4319221411192214, "grad_norm": 1.4447299092393133, "learning_rate": 6.329836943208624e-06, "loss": 0.3377, "step": 4438 }, { "epoch": 0.43201946472019465, "grad_norm": 1.5423068787846852, "learning_rate": 6.328317491823338e-06, "loss": 0.4526, "step": 4439 }, { "epoch": 0.4321167883211679, "grad_norm": 1.4786689709013954, "learning_rate": 6.326797908442328e-06, "loss": 0.6604, "step": 4440 }, { "epoch": 0.4322141119221411, "grad_norm": 1.3727693236517078, "learning_rate": 6.325278193216595e-06, "loss": 0.3873, "step": 4441 }, { "epoch": 0.43231143552311435, "grad_norm": 1.3990806933206996, "learning_rate": 6.323758346297153e-06, "loss": 0.4849, "step": 4442 }, { "epoch": 0.4324087591240876, "grad_norm": 1.768379628255284, "learning_rate": 6.32223836783503e-06, "loss": 0.4031, "step": 4443 }, { "epoch": 0.4325060827250608, "grad_norm": 1.5660766863699724, "learning_rate": 6.3207182579812664e-06, "loss": 0.6173, "step": 4444 }, { "epoch": 0.43260340632603406, "grad_norm": 1.7078680785018712, "learning_rate": 6.319198016886918e-06, "loss": 0.4452, "step": 4445 }, { "epoch": 0.4327007299270073, "grad_norm": 1.4433642478932882, "learning_rate": 6.317677644703054e-06, "loss": 0.5051, "step": 4446 }, { "epoch": 0.4327980535279805, "grad_norm": 1.255438703024322, "learning_rate": 6.316157141580751e-06, "loss": 0.2952, "step": 4447 }, { "epoch": 0.43289537712895376, "grad_norm": 1.499625079665707, "learning_rate": 6.314636507671105e-06, "loss": 0.4835, "step": 4448 }, { "epoch": 0.432992700729927, "grad_norm": 1.339457286562369, "learning_rate": 6.313115743125219e-06, "loss": 0.3763, "step": 4449 }, { "epoch": 0.43309002433090027, "grad_norm": 1.5988790221120088, "learning_rate": 6.311594848094216e-06, "loss": 0.2813, "step": 4450 }, { "epoch": 0.43318734793187347, "grad_norm": 1.431445598979624, "learning_rate": 6.310073822729228e-06, "loss": 0.5356, "step": 4451 }, { "epoch": 0.4332846715328467, "grad_norm": 1.687399967195683, "learning_rate": 6.308552667181397e-06, "loss": 0.7034, "step": 4452 }, { "epoch": 0.43338199513382, "grad_norm": 2.2202476497363235, "learning_rate": 6.307031381601885e-06, "loss": 0.3946, "step": 4453 }, { "epoch": 0.4334793187347932, "grad_norm": 1.527204424266891, "learning_rate": 6.3055099661418585e-06, "loss": 0.3661, "step": 4454 }, { "epoch": 0.4335766423357664, "grad_norm": 1.7819298233274148, "learning_rate": 6.303988420952506e-06, "loss": 0.612, "step": 4455 }, { "epoch": 0.4336739659367397, "grad_norm": 1.6866898181266232, "learning_rate": 6.302466746185022e-06, "loss": 0.5496, "step": 4456 }, { "epoch": 0.4337712895377129, "grad_norm": 1.5237861429986579, "learning_rate": 6.300944941990617e-06, "loss": 0.3251, "step": 4457 }, { "epoch": 0.43386861313868613, "grad_norm": 1.359651910048231, "learning_rate": 6.299423008520514e-06, "loss": 0.3467, "step": 4458 }, { "epoch": 0.4339659367396594, "grad_norm": 1.6804030202924267, "learning_rate": 6.2979009459259474e-06, "loss": 0.6095, "step": 4459 }, { "epoch": 0.4340632603406326, "grad_norm": 1.584995309359472, "learning_rate": 6.296378754358166e-06, "loss": 0.4095, "step": 4460 }, { "epoch": 0.43416058394160584, "grad_norm": 1.477199033753084, "learning_rate": 6.294856433968432e-06, "loss": 0.3005, "step": 4461 }, { "epoch": 0.4342579075425791, "grad_norm": 1.0669301740685238, "learning_rate": 6.293333984908018e-06, "loss": 0.1931, "step": 4462 }, { "epoch": 0.4343552311435523, "grad_norm": 1.7266158013725337, "learning_rate": 6.29181140732821e-06, "loss": 0.5056, "step": 4463 }, { "epoch": 0.43445255474452554, "grad_norm": 1.4202215620919534, "learning_rate": 6.2902887013803095e-06, "loss": 0.365, "step": 4464 }, { "epoch": 0.4345498783454988, "grad_norm": 1.4021022663094602, "learning_rate": 6.2887658672156256e-06, "loss": 0.3146, "step": 4465 }, { "epoch": 0.434647201946472, "grad_norm": 1.353687314020154, "learning_rate": 6.287242904985488e-06, "loss": 0.4899, "step": 4466 }, { "epoch": 0.43474452554744525, "grad_norm": 1.313498525896759, "learning_rate": 6.28571981484123e-06, "loss": 0.3394, "step": 4467 }, { "epoch": 0.4348418491484185, "grad_norm": 1.6439873527634399, "learning_rate": 6.284196596934205e-06, "loss": 0.5208, "step": 4468 }, { "epoch": 0.4349391727493917, "grad_norm": 1.6003841624409958, "learning_rate": 6.282673251415774e-06, "loss": 0.6754, "step": 4469 }, { "epoch": 0.43503649635036495, "grad_norm": 1.853737692197248, "learning_rate": 6.281149778437314e-06, "loss": 0.907, "step": 4470 }, { "epoch": 0.4351338199513382, "grad_norm": 1.4836354873033306, "learning_rate": 6.2796261781502135e-06, "loss": 0.4864, "step": 4471 }, { "epoch": 0.43523114355231146, "grad_norm": 1.4138693493182206, "learning_rate": 6.278102450705872e-06, "loss": 0.3788, "step": 4472 }, { "epoch": 0.43532846715328466, "grad_norm": 1.5811350603509635, "learning_rate": 6.276578596255705e-06, "loss": 0.6215, "step": 4473 }, { "epoch": 0.4354257907542579, "grad_norm": 1.1902771148669904, "learning_rate": 6.2750546149511386e-06, "loss": 0.1741, "step": 4474 }, { "epoch": 0.43552311435523117, "grad_norm": 1.3679298347414897, "learning_rate": 6.2735305069436104e-06, "loss": 0.4947, "step": 4475 }, { "epoch": 0.43562043795620436, "grad_norm": 1.1609853742337026, "learning_rate": 6.2720062723845734e-06, "loss": 0.4, "step": 4476 }, { "epoch": 0.4357177615571776, "grad_norm": 1.2996616987055336, "learning_rate": 6.270481911425491e-06, "loss": 0.485, "step": 4477 }, { "epoch": 0.43581508515815087, "grad_norm": 1.5048416201370751, "learning_rate": 6.268957424217841e-06, "loss": 0.5718, "step": 4478 }, { "epoch": 0.43591240875912407, "grad_norm": 1.3161535728034814, "learning_rate": 6.267432810913112e-06, "loss": 0.363, "step": 4479 }, { "epoch": 0.4360097323600973, "grad_norm": 1.4311736673714228, "learning_rate": 6.265908071662804e-06, "loss": 0.5634, "step": 4480 }, { "epoch": 0.4361070559610706, "grad_norm": 0.9733294260151267, "learning_rate": 6.264383206618434e-06, "loss": 0.2752, "step": 4481 }, { "epoch": 0.4362043795620438, "grad_norm": 1.1377354954511871, "learning_rate": 6.262858215931527e-06, "loss": 0.2207, "step": 4482 }, { "epoch": 0.43630170316301703, "grad_norm": 1.5576967799021977, "learning_rate": 6.261333099753623e-06, "loss": 0.6161, "step": 4483 }, { "epoch": 0.4363990267639903, "grad_norm": 1.5038150115637616, "learning_rate": 6.259807858236276e-06, "loss": 0.4355, "step": 4484 }, { "epoch": 0.4364963503649635, "grad_norm": 1.3037916751392746, "learning_rate": 6.258282491531044e-06, "loss": 0.4034, "step": 4485 }, { "epoch": 0.43659367396593673, "grad_norm": 1.421667146686385, "learning_rate": 6.256756999789509e-06, "loss": 0.3804, "step": 4486 }, { "epoch": 0.43669099756691, "grad_norm": 1.3553832629067233, "learning_rate": 6.255231383163257e-06, "loss": 0.3941, "step": 4487 }, { "epoch": 0.4367883211678832, "grad_norm": 1.4993022249408712, "learning_rate": 6.253705641803893e-06, "loss": 0.4304, "step": 4488 }, { "epoch": 0.43688564476885644, "grad_norm": 1.3146885381920743, "learning_rate": 6.25217977586303e-06, "loss": 0.3686, "step": 4489 }, { "epoch": 0.4369829683698297, "grad_norm": 1.404764110949849, "learning_rate": 6.25065378549229e-06, "loss": 0.3197, "step": 4490 }, { "epoch": 0.4370802919708029, "grad_norm": 1.4483393136893457, "learning_rate": 6.249127670843316e-06, "loss": 0.4756, "step": 4491 }, { "epoch": 0.43717761557177615, "grad_norm": 1.2899076523365525, "learning_rate": 6.247601432067757e-06, "loss": 0.3656, "step": 4492 }, { "epoch": 0.4372749391727494, "grad_norm": 1.135799127727455, "learning_rate": 6.246075069317278e-06, "loss": 0.3566, "step": 4493 }, { "epoch": 0.43737226277372265, "grad_norm": 1.1564573153655837, "learning_rate": 6.244548582743553e-06, "loss": 0.2099, "step": 4494 }, { "epoch": 0.43746958637469585, "grad_norm": 1.4408336628960754, "learning_rate": 6.2430219724982695e-06, "loss": 0.4232, "step": 4495 }, { "epoch": 0.4375669099756691, "grad_norm": 1.203964188436034, "learning_rate": 6.241495238733128e-06, "loss": 0.2844, "step": 4496 }, { "epoch": 0.43766423357664236, "grad_norm": 1.3859509558814715, "learning_rate": 6.239968381599843e-06, "loss": 0.5172, "step": 4497 }, { "epoch": 0.43776155717761556, "grad_norm": 1.3335422213163426, "learning_rate": 6.238441401250138e-06, "loss": 0.4624, "step": 4498 }, { "epoch": 0.4378588807785888, "grad_norm": 1.107326119940465, "learning_rate": 6.236914297835749e-06, "loss": 0.2921, "step": 4499 }, { "epoch": 0.43795620437956206, "grad_norm": 1.4409153213399095, "learning_rate": 6.235387071508427e-06, "loss": 0.4068, "step": 4500 }, { "epoch": 0.43805352798053526, "grad_norm": 1.505851854436676, "learning_rate": 6.233859722419932e-06, "loss": 0.5088, "step": 4501 }, { "epoch": 0.4381508515815085, "grad_norm": 1.5283188304954876, "learning_rate": 6.232332250722037e-06, "loss": 0.3688, "step": 4502 }, { "epoch": 0.43824817518248177, "grad_norm": 1.3093036149624997, "learning_rate": 6.230804656566528e-06, "loss": 0.4499, "step": 4503 }, { "epoch": 0.43834549878345497, "grad_norm": 1.504088868782464, "learning_rate": 6.229276940105207e-06, "loss": 0.437, "step": 4504 }, { "epoch": 0.4384428223844282, "grad_norm": 1.2562191745904712, "learning_rate": 6.227749101489878e-06, "loss": 0.4475, "step": 4505 }, { "epoch": 0.4385401459854015, "grad_norm": 1.6309788825739657, "learning_rate": 6.226221140872368e-06, "loss": 0.3089, "step": 4506 }, { "epoch": 0.4386374695863747, "grad_norm": 2.3528430785037466, "learning_rate": 6.224693058404508e-06, "loss": 0.4253, "step": 4507 }, { "epoch": 0.4387347931873479, "grad_norm": 1.286677193911547, "learning_rate": 6.2231648542381465e-06, "loss": 0.3963, "step": 4508 }, { "epoch": 0.4388321167883212, "grad_norm": 1.5139355239140297, "learning_rate": 6.221636528525142e-06, "loss": 0.4707, "step": 4509 }, { "epoch": 0.4389294403892944, "grad_norm": 1.3239916041185795, "learning_rate": 6.220108081417364e-06, "loss": 0.3053, "step": 4510 }, { "epoch": 0.43902676399026763, "grad_norm": 1.416636532107715, "learning_rate": 6.2185795130666985e-06, "loss": 0.5528, "step": 4511 }, { "epoch": 0.4391240875912409, "grad_norm": 1.1732661826974413, "learning_rate": 6.217050823625035e-06, "loss": 0.3784, "step": 4512 }, { "epoch": 0.43922141119221414, "grad_norm": 1.5433129035443396, "learning_rate": 6.215522013244284e-06, "loss": 0.3514, "step": 4513 }, { "epoch": 0.43931873479318734, "grad_norm": 1.4579446092871258, "learning_rate": 6.213993082076363e-06, "loss": 0.5273, "step": 4514 }, { "epoch": 0.4394160583941606, "grad_norm": 1.7944461495096447, "learning_rate": 6.212464030273204e-06, "loss": 0.2067, "step": 4515 }, { "epoch": 0.43951338199513384, "grad_norm": 1.3661123353334617, "learning_rate": 6.210934857986749e-06, "loss": 0.3677, "step": 4516 }, { "epoch": 0.43961070559610704, "grad_norm": 1.3915341009552726, "learning_rate": 6.209405565368952e-06, "loss": 0.3781, "step": 4517 }, { "epoch": 0.4397080291970803, "grad_norm": 1.4670010156712465, "learning_rate": 6.207876152571781e-06, "loss": 0.5255, "step": 4518 }, { "epoch": 0.43980535279805355, "grad_norm": 1.311301728892854, "learning_rate": 6.206346619747214e-06, "loss": 0.2378, "step": 4519 }, { "epoch": 0.43990267639902675, "grad_norm": 4.29827103564608, "learning_rate": 6.204816967047244e-06, "loss": 0.4591, "step": 4520 }, { "epoch": 0.44, "grad_norm": 1.3808741804783327, "learning_rate": 6.20328719462387e-06, "loss": 0.4271, "step": 4521 }, { "epoch": 0.44009732360097326, "grad_norm": 1.5466603602220501, "learning_rate": 6.2017573026291074e-06, "loss": 0.3395, "step": 4522 }, { "epoch": 0.44019464720194645, "grad_norm": 1.5704367669123898, "learning_rate": 6.2002272912149816e-06, "loss": 0.4498, "step": 4523 }, { "epoch": 0.4402919708029197, "grad_norm": 1.345795826148155, "learning_rate": 6.198697160533535e-06, "loss": 0.4448, "step": 4524 }, { "epoch": 0.44038929440389296, "grad_norm": 1.766947090937083, "learning_rate": 6.197166910736815e-06, "loss": 0.4943, "step": 4525 }, { "epoch": 0.44048661800486616, "grad_norm": 1.2348236041408933, "learning_rate": 6.195636541976881e-06, "loss": 0.3829, "step": 4526 }, { "epoch": 0.4405839416058394, "grad_norm": 1.216056390232819, "learning_rate": 6.194106054405811e-06, "loss": 0.3383, "step": 4527 }, { "epoch": 0.44068126520681267, "grad_norm": 1.0767140762079304, "learning_rate": 6.192575448175685e-06, "loss": 0.3027, "step": 4528 }, { "epoch": 0.44077858880778586, "grad_norm": 2.2278844288860453, "learning_rate": 6.1910447234386074e-06, "loss": 0.4248, "step": 4529 }, { "epoch": 0.4408759124087591, "grad_norm": 1.5645319124687282, "learning_rate": 6.189513880346681e-06, "loss": 0.4836, "step": 4530 }, { "epoch": 0.44097323600973237, "grad_norm": 1.4546530288234156, "learning_rate": 6.187982919052031e-06, "loss": 0.4532, "step": 4531 }, { "epoch": 0.44107055961070557, "grad_norm": 1.3533832876181235, "learning_rate": 6.1864518397067875e-06, "loss": 0.4175, "step": 4532 }, { "epoch": 0.4411678832116788, "grad_norm": 1.3880156134625945, "learning_rate": 6.184920642463095e-06, "loss": 0.3536, "step": 4533 }, { "epoch": 0.4412652068126521, "grad_norm": 1.6523868203467935, "learning_rate": 6.18338932747311e-06, "loss": 0.3165, "step": 4534 }, { "epoch": 0.44136253041362533, "grad_norm": 1.0224564487537273, "learning_rate": 6.181857894889001e-06, "loss": 0.231, "step": 4535 }, { "epoch": 0.44145985401459853, "grad_norm": 1.4535342224784864, "learning_rate": 6.180326344862947e-06, "loss": 0.4897, "step": 4536 }, { "epoch": 0.4415571776155718, "grad_norm": 1.178209958440824, "learning_rate": 6.178794677547138e-06, "loss": 0.249, "step": 4537 }, { "epoch": 0.44165450121654504, "grad_norm": 1.2598408521325328, "learning_rate": 6.177262893093776e-06, "loss": 0.226, "step": 4538 }, { "epoch": 0.44175182481751823, "grad_norm": 1.214630010023126, "learning_rate": 6.175730991655077e-06, "loss": 0.3077, "step": 4539 }, { "epoch": 0.4418491484184915, "grad_norm": 1.1811098530299018, "learning_rate": 6.174198973383268e-06, "loss": 0.2962, "step": 4540 }, { "epoch": 0.44194647201946474, "grad_norm": 1.37805845657898, "learning_rate": 6.1726668384305845e-06, "loss": 0.5316, "step": 4541 }, { "epoch": 0.44204379562043794, "grad_norm": 1.2328033098813826, "learning_rate": 6.171134586949277e-06, "loss": 0.3806, "step": 4542 }, { "epoch": 0.4421411192214112, "grad_norm": 1.4675287079480706, "learning_rate": 6.169602219091605e-06, "loss": 0.5604, "step": 4543 }, { "epoch": 0.44223844282238445, "grad_norm": 1.2852516486753356, "learning_rate": 6.168069735009842e-06, "loss": 0.3012, "step": 4544 }, { "epoch": 0.44233576642335765, "grad_norm": 1.348395899266854, "learning_rate": 6.166537134856272e-06, "loss": 0.4062, "step": 4545 }, { "epoch": 0.4424330900243309, "grad_norm": 1.3878599816806487, "learning_rate": 6.1650044187831895e-06, "loss": 0.3538, "step": 4546 }, { "epoch": 0.44253041362530415, "grad_norm": 1.4923058907243107, "learning_rate": 6.163471586942901e-06, "loss": 0.4973, "step": 4547 }, { "epoch": 0.44262773722627735, "grad_norm": 1.4316166437282047, "learning_rate": 6.161938639487728e-06, "loss": 0.3876, "step": 4548 }, { "epoch": 0.4427250608272506, "grad_norm": 1.5722407558203977, "learning_rate": 6.160405576569996e-06, "loss": 0.5089, "step": 4549 }, { "epoch": 0.44282238442822386, "grad_norm": 1.434215442035674, "learning_rate": 6.1588723983420485e-06, "loss": 0.4982, "step": 4550 }, { "epoch": 0.44291970802919706, "grad_norm": 1.4729024747014614, "learning_rate": 6.15733910495624e-06, "loss": 0.549, "step": 4551 }, { "epoch": 0.4430170316301703, "grad_norm": 1.1632891140397226, "learning_rate": 6.155805696564934e-06, "loss": 0.2645, "step": 4552 }, { "epoch": 0.44311435523114356, "grad_norm": 1.623128987609642, "learning_rate": 6.154272173320503e-06, "loss": 0.4606, "step": 4553 }, { "epoch": 0.44321167883211676, "grad_norm": 1.8114455872128037, "learning_rate": 6.152738535375337e-06, "loss": 0.524, "step": 4554 }, { "epoch": 0.44330900243309, "grad_norm": 1.5846271833353753, "learning_rate": 6.151204782881835e-06, "loss": 0.3161, "step": 4555 }, { "epoch": 0.44340632603406327, "grad_norm": 1.4826265980282791, "learning_rate": 6.149670915992407e-06, "loss": 0.5112, "step": 4556 }, { "epoch": 0.4435036496350365, "grad_norm": 1.2833430244443191, "learning_rate": 6.1481369348594725e-06, "loss": 0.2817, "step": 4557 }, { "epoch": 0.4436009732360097, "grad_norm": 1.757349114269509, "learning_rate": 6.146602839635466e-06, "loss": 0.4294, "step": 4558 }, { "epoch": 0.443698296836983, "grad_norm": 1.7036663909794458, "learning_rate": 6.145068630472829e-06, "loss": 0.3035, "step": 4559 }, { "epoch": 0.44379562043795623, "grad_norm": 2.71526437698097, "learning_rate": 6.143534307524019e-06, "loss": 0.236, "step": 4560 }, { "epoch": 0.4438929440389294, "grad_norm": 1.6314644975788934, "learning_rate": 6.141999870941503e-06, "loss": 0.4021, "step": 4561 }, { "epoch": 0.4439902676399027, "grad_norm": 3.25660621027306, "learning_rate": 6.140465320877757e-06, "loss": 0.5017, "step": 4562 }, { "epoch": 0.44408759124087593, "grad_norm": 2.3230459636290672, "learning_rate": 6.1389306574852715e-06, "loss": 0.336, "step": 4563 }, { "epoch": 0.44418491484184913, "grad_norm": 1.203787782101882, "learning_rate": 6.137395880916546e-06, "loss": 0.3911, "step": 4564 }, { "epoch": 0.4442822384428224, "grad_norm": 2.2630418455306573, "learning_rate": 6.135860991324092e-06, "loss": 0.4102, "step": 4565 }, { "epoch": 0.44437956204379564, "grad_norm": 1.4186240119281917, "learning_rate": 6.1343259888604335e-06, "loss": 0.2737, "step": 4566 }, { "epoch": 0.44447688564476884, "grad_norm": 1.5655785559429751, "learning_rate": 6.132790873678105e-06, "loss": 0.5299, "step": 4567 }, { "epoch": 0.4445742092457421, "grad_norm": 1.430534737115544, "learning_rate": 6.13125564592965e-06, "loss": 0.2333, "step": 4568 }, { "epoch": 0.44467153284671534, "grad_norm": 1.608300378030264, "learning_rate": 6.129720305767628e-06, "loss": 0.4874, "step": 4569 }, { "epoch": 0.44476885644768854, "grad_norm": 1.5687565472834768, "learning_rate": 6.128184853344604e-06, "loss": 0.3432, "step": 4570 }, { "epoch": 0.4448661800486618, "grad_norm": 1.5216378561818185, "learning_rate": 6.126649288813157e-06, "loss": 0.3871, "step": 4571 }, { "epoch": 0.44496350364963505, "grad_norm": 1.1804091917573971, "learning_rate": 6.125113612325879e-06, "loss": 0.235, "step": 4572 }, { "epoch": 0.44506082725060825, "grad_norm": 1.3663869203189398, "learning_rate": 6.123577824035368e-06, "loss": 0.4032, "step": 4573 }, { "epoch": 0.4451581508515815, "grad_norm": 1.7585831372802134, "learning_rate": 6.12204192409424e-06, "loss": 0.6367, "step": 4574 }, { "epoch": 0.44525547445255476, "grad_norm": 1.4053312903418944, "learning_rate": 6.120505912655115e-06, "loss": 0.3227, "step": 4575 }, { "epoch": 0.44535279805352795, "grad_norm": 1.2522162816981348, "learning_rate": 6.118969789870629e-06, "loss": 0.2994, "step": 4576 }, { "epoch": 0.4454501216545012, "grad_norm": 1.282504281897582, "learning_rate": 6.117433555893426e-06, "loss": 0.2873, "step": 4577 }, { "epoch": 0.44554744525547446, "grad_norm": 1.4863199441812478, "learning_rate": 6.115897210876166e-06, "loss": 0.3527, "step": 4578 }, { "epoch": 0.4456447688564477, "grad_norm": 1.20324746041235, "learning_rate": 6.114360754971515e-06, "loss": 0.3046, "step": 4579 }, { "epoch": 0.4457420924574209, "grad_norm": 1.4778238388920404, "learning_rate": 6.112824188332148e-06, "loss": 0.5038, "step": 4580 }, { "epoch": 0.44583941605839417, "grad_norm": 1.3118376195778845, "learning_rate": 6.111287511110758e-06, "loss": 0.423, "step": 4581 }, { "epoch": 0.4459367396593674, "grad_norm": 1.281042407826542, "learning_rate": 6.109750723460045e-06, "loss": 0.3795, "step": 4582 }, { "epoch": 0.4460340632603406, "grad_norm": 1.180836806111601, "learning_rate": 6.108213825532722e-06, "loss": 0.2544, "step": 4583 }, { "epoch": 0.44613138686131387, "grad_norm": 1.2594621159639785, "learning_rate": 6.106676817481508e-06, "loss": 0.2813, "step": 4584 }, { "epoch": 0.4462287104622871, "grad_norm": 1.4518226612130753, "learning_rate": 6.1051396994591405e-06, "loss": 0.4844, "step": 4585 }, { "epoch": 0.4463260340632603, "grad_norm": 1.4653587961723828, "learning_rate": 6.103602471618361e-06, "loss": 0.514, "step": 4586 }, { "epoch": 0.4464233576642336, "grad_norm": 1.5072282835369855, "learning_rate": 6.102065134111924e-06, "loss": 0.5215, "step": 4587 }, { "epoch": 0.44652068126520683, "grad_norm": 1.4484793120998232, "learning_rate": 6.100527687092599e-06, "loss": 0.4045, "step": 4588 }, { "epoch": 0.44661800486618003, "grad_norm": 1.763093466978771, "learning_rate": 6.09899013071316e-06, "loss": 0.4045, "step": 4589 }, { "epoch": 0.4467153284671533, "grad_norm": 1.4119795743556356, "learning_rate": 6.097452465126399e-06, "loss": 0.4171, "step": 4590 }, { "epoch": 0.44681265206812654, "grad_norm": 1.5294922062057048, "learning_rate": 6.095914690485109e-06, "loss": 0.5523, "step": 4591 }, { "epoch": 0.44690997566909973, "grad_norm": 1.481430345332682, "learning_rate": 6.0943768069421035e-06, "loss": 0.502, "step": 4592 }, { "epoch": 0.447007299270073, "grad_norm": 1.6136069350056235, "learning_rate": 6.092838814650202e-06, "loss": 0.5971, "step": 4593 }, { "epoch": 0.44710462287104624, "grad_norm": 1.3313623801781076, "learning_rate": 6.091300713762236e-06, "loss": 0.4233, "step": 4594 }, { "epoch": 0.44720194647201944, "grad_norm": 1.423435654204561, "learning_rate": 6.0897625044310475e-06, "loss": 0.4662, "step": 4595 }, { "epoch": 0.4472992700729927, "grad_norm": 1.3331887146880435, "learning_rate": 6.08822418680949e-06, "loss": 0.3056, "step": 4596 }, { "epoch": 0.44739659367396595, "grad_norm": 1.2900254785254246, "learning_rate": 6.086685761050423e-06, "loss": 0.252, "step": 4597 }, { "epoch": 0.44749391727493915, "grad_norm": 1.3447249668086938, "learning_rate": 6.085147227306727e-06, "loss": 0.3061, "step": 4598 }, { "epoch": 0.4475912408759124, "grad_norm": 1.12221277194141, "learning_rate": 6.083608585731283e-06, "loss": 0.3122, "step": 4599 }, { "epoch": 0.44768856447688565, "grad_norm": 1.4658809804338166, "learning_rate": 6.082069836476988e-06, "loss": 0.4721, "step": 4600 }, { "epoch": 0.4477858880778589, "grad_norm": 1.2247027737228893, "learning_rate": 6.0805309796967484e-06, "loss": 0.2408, "step": 4601 }, { "epoch": 0.4478832116788321, "grad_norm": 1.2025495862353188, "learning_rate": 6.07899201554348e-06, "loss": 0.4184, "step": 4602 }, { "epoch": 0.44798053527980536, "grad_norm": 1.3242619600251044, "learning_rate": 6.077452944170113e-06, "loss": 0.3371, "step": 4603 }, { "epoch": 0.4480778588807786, "grad_norm": 1.106734989558217, "learning_rate": 6.075913765729584e-06, "loss": 0.2301, "step": 4604 }, { "epoch": 0.4481751824817518, "grad_norm": 1.2729210303565284, "learning_rate": 6.074374480374844e-06, "loss": 0.2714, "step": 4605 }, { "epoch": 0.44827250608272506, "grad_norm": 15.212009323594724, "learning_rate": 6.072835088258851e-06, "loss": 0.38, "step": 4606 }, { "epoch": 0.4483698296836983, "grad_norm": 1.4758284424424868, "learning_rate": 6.071295589534576e-06, "loss": 0.5838, "step": 4607 }, { "epoch": 0.4484671532846715, "grad_norm": 1.3641656231434411, "learning_rate": 6.0697559843549994e-06, "loss": 0.3822, "step": 4608 }, { "epoch": 0.44856447688564477, "grad_norm": 1.12662425936695, "learning_rate": 6.068216272873112e-06, "loss": 0.2544, "step": 4609 }, { "epoch": 0.448661800486618, "grad_norm": 1.3773493636304128, "learning_rate": 6.066676455241919e-06, "loss": 0.2609, "step": 4610 }, { "epoch": 0.4487591240875912, "grad_norm": 1.3603847743332291, "learning_rate": 6.0651365316144295e-06, "loss": 0.3462, "step": 4611 }, { "epoch": 0.4488564476885645, "grad_norm": 1.9756668534351456, "learning_rate": 6.0635965021436696e-06, "loss": 0.3505, "step": 4612 }, { "epoch": 0.44895377128953773, "grad_norm": 1.7428842015365853, "learning_rate": 6.0620563669826695e-06, "loss": 0.5871, "step": 4613 }, { "epoch": 0.4490510948905109, "grad_norm": 1.427382899490253, "learning_rate": 6.060516126284477e-06, "loss": 0.4058, "step": 4614 }, { "epoch": 0.4491484184914842, "grad_norm": 1.6387860846020157, "learning_rate": 6.058975780202144e-06, "loss": 0.4983, "step": 4615 }, { "epoch": 0.44924574209245743, "grad_norm": 1.3894509921139317, "learning_rate": 6.057435328888739e-06, "loss": 0.3205, "step": 4616 }, { "epoch": 0.44934306569343063, "grad_norm": 1.582913828371373, "learning_rate": 6.0558947724973345e-06, "loss": 0.4199, "step": 4617 }, { "epoch": 0.4494403892944039, "grad_norm": 1.555270291844662, "learning_rate": 6.054354111181015e-06, "loss": 0.4911, "step": 4618 }, { "epoch": 0.44953771289537714, "grad_norm": 1.382143365205803, "learning_rate": 6.0528133450928826e-06, "loss": 0.2393, "step": 4619 }, { "epoch": 0.44963503649635034, "grad_norm": 1.720605299698648, "learning_rate": 6.051272474386039e-06, "loss": 0.3921, "step": 4620 }, { "epoch": 0.4497323600973236, "grad_norm": 1.6775663955669964, "learning_rate": 6.0497314992136055e-06, "loss": 0.5277, "step": 4621 }, { "epoch": 0.44982968369829684, "grad_norm": 1.2197851040232244, "learning_rate": 6.048190419728706e-06, "loss": 0.3849, "step": 4622 }, { "epoch": 0.4499270072992701, "grad_norm": 1.324451887375089, "learning_rate": 6.046649236084481e-06, "loss": 0.2846, "step": 4623 }, { "epoch": 0.4500243309002433, "grad_norm": 1.3942500239593356, "learning_rate": 6.045107948434077e-06, "loss": 0.2472, "step": 4624 }, { "epoch": 0.45012165450121655, "grad_norm": 1.2311046107859251, "learning_rate": 6.043566556930656e-06, "loss": 0.3205, "step": 4625 }, { "epoch": 0.4502189781021898, "grad_norm": 1.251194954956108, "learning_rate": 6.042025061727384e-06, "loss": 0.3929, "step": 4626 }, { "epoch": 0.450316301703163, "grad_norm": 1.2916062658810665, "learning_rate": 6.040483462977439e-06, "loss": 0.3156, "step": 4627 }, { "epoch": 0.45041362530413626, "grad_norm": 1.392814969203336, "learning_rate": 6.038941760834014e-06, "loss": 0.3315, "step": 4628 }, { "epoch": 0.4505109489051095, "grad_norm": 1.1827916111985008, "learning_rate": 6.037399955450307e-06, "loss": 0.3114, "step": 4629 }, { "epoch": 0.4506082725060827, "grad_norm": 1.2372835875581027, "learning_rate": 6.0358580469795315e-06, "loss": 0.2393, "step": 4630 }, { "epoch": 0.45070559610705596, "grad_norm": 1.988431374029195, "learning_rate": 6.034316035574903e-06, "loss": 0.4706, "step": 4631 }, { "epoch": 0.4508029197080292, "grad_norm": 2.396437018811852, "learning_rate": 6.032773921389655e-06, "loss": 0.4336, "step": 4632 }, { "epoch": 0.4509002433090024, "grad_norm": 1.4280452781145334, "learning_rate": 6.031231704577027e-06, "loss": 0.3378, "step": 4633 }, { "epoch": 0.45099756690997567, "grad_norm": 1.498033506092563, "learning_rate": 6.0296893852902705e-06, "loss": 0.3096, "step": 4634 }, { "epoch": 0.4510948905109489, "grad_norm": 1.2436054701671617, "learning_rate": 6.0281469636826486e-06, "loss": 0.3644, "step": 4635 }, { "epoch": 0.4511922141119221, "grad_norm": 1.5192499758500848, "learning_rate": 6.026604439907429e-06, "loss": 0.3506, "step": 4636 }, { "epoch": 0.45128953771289537, "grad_norm": 1.6928058850470935, "learning_rate": 6.025061814117896e-06, "loss": 0.3323, "step": 4637 }, { "epoch": 0.4513868613138686, "grad_norm": 1.1049977299938791, "learning_rate": 6.023519086467341e-06, "loss": 0.2905, "step": 4638 }, { "epoch": 0.4514841849148418, "grad_norm": 1.4257531568293946, "learning_rate": 6.021976257109064e-06, "loss": 0.308, "step": 4639 }, { "epoch": 0.4515815085158151, "grad_norm": 1.5893181909270777, "learning_rate": 6.020433326196379e-06, "loss": 0.4309, "step": 4640 }, { "epoch": 0.45167883211678833, "grad_norm": 1.1799946568254955, "learning_rate": 6.018890293882607e-06, "loss": 0.3656, "step": 4641 }, { "epoch": 0.45177615571776153, "grad_norm": 1.5076992802780589, "learning_rate": 6.01734716032108e-06, "loss": 0.3004, "step": 4642 }, { "epoch": 0.4518734793187348, "grad_norm": 1.4123734982988385, "learning_rate": 6.015803925665141e-06, "loss": 0.5163, "step": 4643 }, { "epoch": 0.45197080291970804, "grad_norm": 1.718868320900687, "learning_rate": 6.014260590068142e-06, "loss": 0.484, "step": 4644 }, { "epoch": 0.4520681265206813, "grad_norm": 1.6108627109575375, "learning_rate": 6.012717153683443e-06, "loss": 0.5274, "step": 4645 }, { "epoch": 0.4521654501216545, "grad_norm": 1.1731609155667762, "learning_rate": 6.0111736166644196e-06, "loss": 0.3736, "step": 4646 }, { "epoch": 0.45226277372262774, "grad_norm": 1.2623844811494913, "learning_rate": 6.009629979164451e-06, "loss": 0.2155, "step": 4647 }, { "epoch": 0.452360097323601, "grad_norm": 1.42235749829862, "learning_rate": 6.0080862413369324e-06, "loss": 0.3683, "step": 4648 }, { "epoch": 0.4524574209245742, "grad_norm": 1.3023936041870583, "learning_rate": 6.006542403335263e-06, "loss": 0.3659, "step": 4649 }, { "epoch": 0.45255474452554745, "grad_norm": 1.52765919581587, "learning_rate": 6.004998465312857e-06, "loss": 0.5769, "step": 4650 }, { "epoch": 0.4526520681265207, "grad_norm": 1.5719134771593801, "learning_rate": 6.003454427423135e-06, "loss": 0.5393, "step": 4651 }, { "epoch": 0.4527493917274939, "grad_norm": 1.3672396404678124, "learning_rate": 6.00191028981953e-06, "loss": 0.4496, "step": 4652 }, { "epoch": 0.45284671532846715, "grad_norm": 1.5536820117072887, "learning_rate": 6.000366052655485e-06, "loss": 0.4263, "step": 4653 }, { "epoch": 0.4529440389294404, "grad_norm": 1.634041647530894, "learning_rate": 5.99882171608445e-06, "loss": 0.3762, "step": 4654 }, { "epoch": 0.4530413625304136, "grad_norm": 1.388217985979722, "learning_rate": 5.997277280259886e-06, "loss": 0.4541, "step": 4655 }, { "epoch": 0.45313868613138686, "grad_norm": 1.3776033638252942, "learning_rate": 5.9957327453352655e-06, "loss": 0.4501, "step": 4656 }, { "epoch": 0.4532360097323601, "grad_norm": 1.195712951201049, "learning_rate": 5.994188111464072e-06, "loss": 0.219, "step": 4657 }, { "epoch": 0.4533333333333333, "grad_norm": 1.511034512487341, "learning_rate": 5.992643378799794e-06, "loss": 0.3969, "step": 4658 }, { "epoch": 0.45343065693430656, "grad_norm": 3.725563795191409, "learning_rate": 5.991098547495933e-06, "loss": 0.6012, "step": 4659 }, { "epoch": 0.4535279805352798, "grad_norm": 1.5248535617497452, "learning_rate": 5.989553617706e-06, "loss": 0.4387, "step": 4660 }, { "epoch": 0.453625304136253, "grad_norm": 1.5028785265567297, "learning_rate": 5.988008589583516e-06, "loss": 0.3756, "step": 4661 }, { "epoch": 0.45372262773722627, "grad_norm": 1.2276140795525827, "learning_rate": 5.9864634632820115e-06, "loss": 0.4242, "step": 4662 }, { "epoch": 0.4538199513381995, "grad_norm": 1.6639607716301263, "learning_rate": 5.984918238955025e-06, "loss": 0.3579, "step": 4663 }, { "epoch": 0.4539172749391728, "grad_norm": 1.4925341777626235, "learning_rate": 5.98337291675611e-06, "loss": 0.5927, "step": 4664 }, { "epoch": 0.454014598540146, "grad_norm": 1.4464693955138501, "learning_rate": 5.9818274968388225e-06, "loss": 0.4453, "step": 4665 }, { "epoch": 0.45411192214111923, "grad_norm": 1.4591826645028443, "learning_rate": 5.980281979356732e-06, "loss": 0.479, "step": 4666 }, { "epoch": 0.4542092457420925, "grad_norm": 1.2111037109500833, "learning_rate": 5.97873636446342e-06, "loss": 0.3399, "step": 4667 }, { "epoch": 0.4543065693430657, "grad_norm": 1.531131116962134, "learning_rate": 5.977190652312474e-06, "loss": 0.4434, "step": 4668 }, { "epoch": 0.45440389294403893, "grad_norm": 1.4656116964879211, "learning_rate": 5.975644843057492e-06, "loss": 0.2733, "step": 4669 }, { "epoch": 0.4545012165450122, "grad_norm": 1.2977483175873437, "learning_rate": 5.974098936852083e-06, "loss": 0.3621, "step": 4670 }, { "epoch": 0.4545985401459854, "grad_norm": 1.8005773466107835, "learning_rate": 5.9725529338498625e-06, "loss": 0.6883, "step": 4671 }, { "epoch": 0.45469586374695864, "grad_norm": 1.342052274382292, "learning_rate": 5.9710068342044595e-06, "loss": 0.2238, "step": 4672 }, { "epoch": 0.4547931873479319, "grad_norm": 1.4839427117757193, "learning_rate": 5.969460638069512e-06, "loss": 0.5407, "step": 4673 }, { "epoch": 0.4548905109489051, "grad_norm": 1.5533686272182006, "learning_rate": 5.967914345598663e-06, "loss": 0.4274, "step": 4674 }, { "epoch": 0.45498783454987834, "grad_norm": 1.4488756951086967, "learning_rate": 5.966367956945572e-06, "loss": 0.4039, "step": 4675 }, { "epoch": 0.4550851581508516, "grad_norm": 1.3695980945732107, "learning_rate": 5.964821472263903e-06, "loss": 0.3252, "step": 4676 }, { "epoch": 0.4551824817518248, "grad_norm": 1.3430505750937012, "learning_rate": 5.96327489170733e-06, "loss": 0.3255, "step": 4677 }, { "epoch": 0.45527980535279805, "grad_norm": 1.4850760962515406, "learning_rate": 5.96172821542954e-06, "loss": 0.3645, "step": 4678 }, { "epoch": 0.4553771289537713, "grad_norm": 1.428325734127547, "learning_rate": 5.960181443584226e-06, "loss": 0.4561, "step": 4679 }, { "epoch": 0.4554744525547445, "grad_norm": 1.2874387745049296, "learning_rate": 5.958634576325093e-06, "loss": 0.3419, "step": 4680 }, { "epoch": 0.45557177615571776, "grad_norm": 1.5518515326284563, "learning_rate": 5.957087613805851e-06, "loss": 0.6531, "step": 4681 }, { "epoch": 0.455669099756691, "grad_norm": 1.4272935345883002, "learning_rate": 5.955540556180225e-06, "loss": 0.3665, "step": 4682 }, { "epoch": 0.4557664233576642, "grad_norm": 1.4818369800930495, "learning_rate": 5.9539934036019465e-06, "loss": 0.3336, "step": 4683 }, { "epoch": 0.45586374695863746, "grad_norm": 1.374483488345197, "learning_rate": 5.952446156224759e-06, "loss": 0.2611, "step": 4684 }, { "epoch": 0.4559610705596107, "grad_norm": 1.5687184759977315, "learning_rate": 5.950898814202408e-06, "loss": 0.5522, "step": 4685 }, { "epoch": 0.45605839416058397, "grad_norm": 1.7181432866926953, "learning_rate": 5.94935137768866e-06, "loss": 0.7505, "step": 4686 }, { "epoch": 0.45615571776155717, "grad_norm": 1.564740824825937, "learning_rate": 5.94780384683728e-06, "loss": 0.3494, "step": 4687 }, { "epoch": 0.4562530413625304, "grad_norm": 1.1399923163707189, "learning_rate": 5.946256221802052e-06, "loss": 0.3039, "step": 4688 }, { "epoch": 0.4563503649635037, "grad_norm": 1.55416378228923, "learning_rate": 5.94470850273676e-06, "loss": 0.4167, "step": 4689 }, { "epoch": 0.45644768856447687, "grad_norm": 1.100399512653527, "learning_rate": 5.943160689795204e-06, "loss": 0.2567, "step": 4690 }, { "epoch": 0.4565450121654501, "grad_norm": 1.6707898477554108, "learning_rate": 5.941612783131191e-06, "loss": 0.4441, "step": 4691 }, { "epoch": 0.4566423357664234, "grad_norm": 1.3812763712165028, "learning_rate": 5.940064782898535e-06, "loss": 0.4095, "step": 4692 }, { "epoch": 0.4567396593673966, "grad_norm": 1.401481979932014, "learning_rate": 5.938516689251065e-06, "loss": 0.278, "step": 4693 }, { "epoch": 0.45683698296836983, "grad_norm": 1.450856560892366, "learning_rate": 5.936968502342614e-06, "loss": 0.333, "step": 4694 }, { "epoch": 0.4569343065693431, "grad_norm": 1.499290298416481, "learning_rate": 5.935420222327028e-06, "loss": 0.4097, "step": 4695 }, { "epoch": 0.4570316301703163, "grad_norm": 1.2903930270860955, "learning_rate": 5.933871849358159e-06, "loss": 0.4401, "step": 4696 }, { "epoch": 0.45712895377128954, "grad_norm": 1.6502016580057606, "learning_rate": 5.93232338358987e-06, "loss": 0.5137, "step": 4697 }, { "epoch": 0.4572262773722628, "grad_norm": 1.9160637805731742, "learning_rate": 5.930774825176034e-06, "loss": 0.35, "step": 4698 }, { "epoch": 0.457323600973236, "grad_norm": 1.3674680149584886, "learning_rate": 5.9292261742705315e-06, "loss": 0.5852, "step": 4699 }, { "epoch": 0.45742092457420924, "grad_norm": 1.2672307224639892, "learning_rate": 5.927677431027253e-06, "loss": 0.326, "step": 4700 }, { "epoch": 0.4575182481751825, "grad_norm": 1.1561098271538528, "learning_rate": 5.926128595600098e-06, "loss": 0.28, "step": 4701 }, { "epoch": 0.4576155717761557, "grad_norm": 1.712247582091781, "learning_rate": 5.9245796681429744e-06, "loss": 0.3832, "step": 4702 }, { "epoch": 0.45771289537712895, "grad_norm": 1.2239785645889314, "learning_rate": 5.923030648809801e-06, "loss": 0.3703, "step": 4703 }, { "epoch": 0.4578102189781022, "grad_norm": 1.3056423325253275, "learning_rate": 5.921481537754505e-06, "loss": 0.2638, "step": 4704 }, { "epoch": 0.4579075425790754, "grad_norm": 1.3444712278011954, "learning_rate": 5.919932335131022e-06, "loss": 0.4296, "step": 4705 }, { "epoch": 0.45800486618004865, "grad_norm": 1.3515867550828926, "learning_rate": 5.918383041093299e-06, "loss": 0.2354, "step": 4706 }, { "epoch": 0.4581021897810219, "grad_norm": 1.6942273019928202, "learning_rate": 5.916833655795287e-06, "loss": 0.5158, "step": 4707 }, { "epoch": 0.45819951338199516, "grad_norm": 1.4532183545386486, "learning_rate": 5.915284179390951e-06, "loss": 0.3856, "step": 4708 }, { "epoch": 0.45829683698296836, "grad_norm": 1.1775832957066483, "learning_rate": 5.9137346120342655e-06, "loss": 0.3254, "step": 4709 }, { "epoch": 0.4583941605839416, "grad_norm": 1.386752147003762, "learning_rate": 5.912184953879207e-06, "loss": 0.423, "step": 4710 }, { "epoch": 0.45849148418491487, "grad_norm": 1.1159458329512677, "learning_rate": 5.910635205079772e-06, "loss": 0.2296, "step": 4711 }, { "epoch": 0.45858880778588806, "grad_norm": 1.4813859257522484, "learning_rate": 5.909085365789955e-06, "loss": 0.5599, "step": 4712 }, { "epoch": 0.4586861313868613, "grad_norm": 1.3715052842926014, "learning_rate": 5.907535436163767e-06, "loss": 0.3495, "step": 4713 }, { "epoch": 0.45878345498783457, "grad_norm": 1.4981944025202203, "learning_rate": 5.905985416355225e-06, "loss": 0.3131, "step": 4714 }, { "epoch": 0.45888077858880777, "grad_norm": 1.6325759351282747, "learning_rate": 5.904435306518354e-06, "loss": 0.6115, "step": 4715 }, { "epoch": 0.458978102189781, "grad_norm": 1.7392502730482995, "learning_rate": 5.902885106807193e-06, "loss": 0.5174, "step": 4716 }, { "epoch": 0.4590754257907543, "grad_norm": 1.3934187525403232, "learning_rate": 5.901334817375782e-06, "loss": 0.4157, "step": 4717 }, { "epoch": 0.4591727493917275, "grad_norm": 1.5194400867877766, "learning_rate": 5.899784438378177e-06, "loss": 0.4937, "step": 4718 }, { "epoch": 0.45927007299270073, "grad_norm": 1.2421749762137688, "learning_rate": 5.898233969968439e-06, "loss": 0.389, "step": 4719 }, { "epoch": 0.459367396593674, "grad_norm": 1.2968529389125105, "learning_rate": 5.89668341230064e-06, "loss": 0.4248, "step": 4720 }, { "epoch": 0.4594647201946472, "grad_norm": 1.7127495493366949, "learning_rate": 5.895132765528858e-06, "loss": 0.3465, "step": 4721 }, { "epoch": 0.45956204379562043, "grad_norm": 1.4977326741267858, "learning_rate": 5.893582029807184e-06, "loss": 0.4816, "step": 4722 }, { "epoch": 0.4596593673965937, "grad_norm": 1.4970528737823605, "learning_rate": 5.892031205289714e-06, "loss": 0.3091, "step": 4723 }, { "epoch": 0.4597566909975669, "grad_norm": 2.0058564034712045, "learning_rate": 5.890480292130555e-06, "loss": 0.5912, "step": 4724 }, { "epoch": 0.45985401459854014, "grad_norm": 1.7918745703907915, "learning_rate": 5.888929290483822e-06, "loss": 0.3417, "step": 4725 }, { "epoch": 0.4599513381995134, "grad_norm": 1.572239694322271, "learning_rate": 5.887378200503639e-06, "loss": 0.4157, "step": 4726 }, { "epoch": 0.4600486618004866, "grad_norm": 1.5704926344763475, "learning_rate": 5.88582702234414e-06, "loss": 0.2702, "step": 4727 }, { "epoch": 0.46014598540145984, "grad_norm": 1.5454666971045126, "learning_rate": 5.8842757561594636e-06, "loss": 0.5971, "step": 4728 }, { "epoch": 0.4602433090024331, "grad_norm": 1.4922891152819104, "learning_rate": 5.882724402103762e-06, "loss": 0.3751, "step": 4729 }, { "epoch": 0.46034063260340635, "grad_norm": 2.476920598371043, "learning_rate": 5.881172960331194e-06, "loss": 0.2977, "step": 4730 }, { "epoch": 0.46043795620437955, "grad_norm": 1.2731556024557937, "learning_rate": 5.879621430995927e-06, "loss": 0.3484, "step": 4731 }, { "epoch": 0.4605352798053528, "grad_norm": 1.1937076541120601, "learning_rate": 5.8780698142521385e-06, "loss": 0.3455, "step": 4732 }, { "epoch": 0.46063260340632606, "grad_norm": 1.3845968856553887, "learning_rate": 5.8765181102540136e-06, "loss": 0.368, "step": 4733 }, { "epoch": 0.46072992700729926, "grad_norm": 1.2118541078699578, "learning_rate": 5.874966319155744e-06, "loss": 0.3384, "step": 4734 }, { "epoch": 0.4608272506082725, "grad_norm": 1.3623706651249927, "learning_rate": 5.873414441111534e-06, "loss": 0.2656, "step": 4735 }, { "epoch": 0.46092457420924576, "grad_norm": 1.4140159641427623, "learning_rate": 5.871862476275595e-06, "loss": 0.3252, "step": 4736 }, { "epoch": 0.46102189781021896, "grad_norm": 1.2601663114899302, "learning_rate": 5.870310424802144e-06, "loss": 0.3305, "step": 4737 }, { "epoch": 0.4611192214111922, "grad_norm": 1.530399475555011, "learning_rate": 5.868758286845413e-06, "loss": 0.524, "step": 4738 }, { "epoch": 0.46121654501216547, "grad_norm": 1.4374656673146151, "learning_rate": 5.867206062559636e-06, "loss": 0.444, "step": 4739 }, { "epoch": 0.46131386861313867, "grad_norm": 1.3525900930975647, "learning_rate": 5.865653752099058e-06, "loss": 0.3673, "step": 4740 }, { "epoch": 0.4614111922141119, "grad_norm": 1.138213652709309, "learning_rate": 5.864101355617937e-06, "loss": 0.2852, "step": 4741 }, { "epoch": 0.4615085158150852, "grad_norm": 1.855979768753775, "learning_rate": 5.862548873270533e-06, "loss": 0.7494, "step": 4742 }, { "epoch": 0.46160583941605837, "grad_norm": 1.298570742237486, "learning_rate": 5.860996305211116e-06, "loss": 0.31, "step": 4743 }, { "epoch": 0.4617031630170316, "grad_norm": 1.3800618891728689, "learning_rate": 5.859443651593968e-06, "loss": 0.3635, "step": 4744 }, { "epoch": 0.4618004866180049, "grad_norm": 1.1578742295256148, "learning_rate": 5.8578909125733764e-06, "loss": 0.2873, "step": 4745 }, { "epoch": 0.4618978102189781, "grad_norm": 1.3754807845437125, "learning_rate": 5.856338088303636e-06, "loss": 0.4375, "step": 4746 }, { "epoch": 0.46199513381995133, "grad_norm": 1.3080329262616794, "learning_rate": 5.854785178939054e-06, "loss": 0.4391, "step": 4747 }, { "epoch": 0.4620924574209246, "grad_norm": 1.4965704813226828, "learning_rate": 5.853232184633943e-06, "loss": 0.5334, "step": 4748 }, { "epoch": 0.4621897810218978, "grad_norm": 2.2958549258491168, "learning_rate": 5.851679105542627e-06, "loss": 0.54, "step": 4749 }, { "epoch": 0.46228710462287104, "grad_norm": 1.31437240738189, "learning_rate": 5.850125941819433e-06, "loss": 0.2722, "step": 4750 }, { "epoch": 0.4623844282238443, "grad_norm": 1.2323280455936996, "learning_rate": 5.848572693618703e-06, "loss": 0.3704, "step": 4751 }, { "epoch": 0.46248175182481754, "grad_norm": 1.4382237809866574, "learning_rate": 5.8470193610947825e-06, "loss": 0.4253, "step": 4752 }, { "epoch": 0.46257907542579074, "grad_norm": 1.5270857727253344, "learning_rate": 5.8454659444020276e-06, "loss": 0.528, "step": 4753 }, { "epoch": 0.462676399026764, "grad_norm": 1.339534089808389, "learning_rate": 5.843912443694802e-06, "loss": 0.4199, "step": 4754 }, { "epoch": 0.46277372262773725, "grad_norm": 1.6462659951188234, "learning_rate": 5.8423588591274786e-06, "loss": 0.198, "step": 4755 }, { "epoch": 0.46287104622871045, "grad_norm": 1.46963555443937, "learning_rate": 5.8408051908544365e-06, "loss": 0.3966, "step": 4756 }, { "epoch": 0.4629683698296837, "grad_norm": 1.3757584820693338, "learning_rate": 5.8392514390300644e-06, "loss": 0.4127, "step": 4757 }, { "epoch": 0.46306569343065695, "grad_norm": 1.5854642565983017, "learning_rate": 5.837697603808764e-06, "loss": 0.3674, "step": 4758 }, { "epoch": 0.46316301703163015, "grad_norm": 1.4058604230609197, "learning_rate": 5.836143685344937e-06, "loss": 0.1736, "step": 4759 }, { "epoch": 0.4632603406326034, "grad_norm": 3.188656914883977, "learning_rate": 5.834589683792998e-06, "loss": 0.4361, "step": 4760 }, { "epoch": 0.46335766423357666, "grad_norm": 1.277741209948926, "learning_rate": 5.833035599307367e-06, "loss": 0.2996, "step": 4761 }, { "epoch": 0.46345498783454986, "grad_norm": 1.5163201379867834, "learning_rate": 5.831481432042479e-06, "loss": 0.579, "step": 4762 }, { "epoch": 0.4635523114355231, "grad_norm": 1.480130924542566, "learning_rate": 5.8299271821527704e-06, "loss": 0.4744, "step": 4763 }, { "epoch": 0.46364963503649637, "grad_norm": 1.5295331243873154, "learning_rate": 5.8283728497926865e-06, "loss": 0.4863, "step": 4764 }, { "epoch": 0.46374695863746956, "grad_norm": 1.7381638390825362, "learning_rate": 5.826818435116684e-06, "loss": 0.5357, "step": 4765 }, { "epoch": 0.4638442822384428, "grad_norm": 1.4226409190659568, "learning_rate": 5.825263938279223e-06, "loss": 0.3567, "step": 4766 }, { "epoch": 0.46394160583941607, "grad_norm": 1.3756233016354147, "learning_rate": 5.823709359434779e-06, "loss": 0.4331, "step": 4767 }, { "epoch": 0.46403892944038927, "grad_norm": 1.4866340713320438, "learning_rate": 5.82215469873783e-06, "loss": 0.291, "step": 4768 }, { "epoch": 0.4641362530413625, "grad_norm": 1.4247021916044675, "learning_rate": 5.820599956342864e-06, "loss": 0.2503, "step": 4769 }, { "epoch": 0.4642335766423358, "grad_norm": 1.514852346520823, "learning_rate": 5.819045132404374e-06, "loss": 0.4879, "step": 4770 }, { "epoch": 0.464330900243309, "grad_norm": 1.1723389787867933, "learning_rate": 5.8174902270768666e-06, "loss": 0.3285, "step": 4771 }, { "epoch": 0.46442822384428223, "grad_norm": 1.6214649189163381, "learning_rate": 5.8159352405148525e-06, "loss": 0.6075, "step": 4772 }, { "epoch": 0.4645255474452555, "grad_norm": 1.5229859294178767, "learning_rate": 5.814380172872853e-06, "loss": 0.448, "step": 4773 }, { "epoch": 0.46462287104622874, "grad_norm": 1.3925630159347562, "learning_rate": 5.812825024305395e-06, "loss": 0.3614, "step": 4774 }, { "epoch": 0.46472019464720193, "grad_norm": 1.2947793597339534, "learning_rate": 5.8112697949670135e-06, "loss": 0.3463, "step": 4775 }, { "epoch": 0.4648175182481752, "grad_norm": 1.5942348415927672, "learning_rate": 5.809714485012254e-06, "loss": 0.3919, "step": 4776 }, { "epoch": 0.46491484184914844, "grad_norm": 1.89133736297067, "learning_rate": 5.808159094595669e-06, "loss": 0.2706, "step": 4777 }, { "epoch": 0.46501216545012164, "grad_norm": 1.4231638673643952, "learning_rate": 5.806603623871819e-06, "loss": 0.4138, "step": 4778 }, { "epoch": 0.4651094890510949, "grad_norm": 1.7206309343451145, "learning_rate": 5.80504807299527e-06, "loss": 0.403, "step": 4779 }, { "epoch": 0.46520681265206815, "grad_norm": 1.359607983497329, "learning_rate": 5.8034924421206e-06, "loss": 0.3557, "step": 4780 }, { "epoch": 0.46530413625304134, "grad_norm": 1.2100118729505402, "learning_rate": 5.801936731402392e-06, "loss": 0.3914, "step": 4781 }, { "epoch": 0.4654014598540146, "grad_norm": 1.5790450180961257, "learning_rate": 5.800380940995236e-06, "loss": 0.403, "step": 4782 }, { "epoch": 0.46549878345498785, "grad_norm": 1.1697128695101966, "learning_rate": 5.798825071053738e-06, "loss": 0.3045, "step": 4783 }, { "epoch": 0.46559610705596105, "grad_norm": 1.5814625412505545, "learning_rate": 5.7972691217324985e-06, "loss": 0.6549, "step": 4784 }, { "epoch": 0.4656934306569343, "grad_norm": 1.4855521102439881, "learning_rate": 5.795713093186137e-06, "loss": 0.4597, "step": 4785 }, { "epoch": 0.46579075425790756, "grad_norm": 1.059777361772832, "learning_rate": 5.794156985569276e-06, "loss": 0.3115, "step": 4786 }, { "epoch": 0.46588807785888076, "grad_norm": 1.5462466418292278, "learning_rate": 5.792600799036547e-06, "loss": 0.4384, "step": 4787 }, { "epoch": 0.465985401459854, "grad_norm": 1.34648475038298, "learning_rate": 5.79104453374259e-06, "loss": 0.2994, "step": 4788 }, { "epoch": 0.46608272506082726, "grad_norm": 1.3342291633904535, "learning_rate": 5.789488189842053e-06, "loss": 0.3545, "step": 4789 }, { "epoch": 0.46618004866180046, "grad_norm": 1.347617814994831, "learning_rate": 5.787931767489588e-06, "loss": 0.4196, "step": 4790 }, { "epoch": 0.4662773722627737, "grad_norm": 1.394591685462805, "learning_rate": 5.786375266839859e-06, "loss": 0.4832, "step": 4791 }, { "epoch": 0.46637469586374697, "grad_norm": 1.5611341992108034, "learning_rate": 5.784818688047536e-06, "loss": 0.4155, "step": 4792 }, { "epoch": 0.4664720194647202, "grad_norm": 1.3150159699228137, "learning_rate": 5.7832620312672975e-06, "loss": 0.4826, "step": 4793 }, { "epoch": 0.4665693430656934, "grad_norm": 1.4168733090801606, "learning_rate": 5.7817052966538304e-06, "loss": 0.5195, "step": 4794 }, { "epoch": 0.4666666666666667, "grad_norm": 1.300365897184467, "learning_rate": 5.780148484361826e-06, "loss": 0.3441, "step": 4795 }, { "epoch": 0.4667639902676399, "grad_norm": 1.3810574013218457, "learning_rate": 5.778591594545989e-06, "loss": 0.3596, "step": 4796 }, { "epoch": 0.4668613138686131, "grad_norm": 1.4598916946372429, "learning_rate": 5.777034627361025e-06, "loss": 0.3572, "step": 4797 }, { "epoch": 0.4669586374695864, "grad_norm": 1.4837209766509183, "learning_rate": 5.775477582961653e-06, "loss": 0.342, "step": 4798 }, { "epoch": 0.46705596107055963, "grad_norm": 1.6607214111550288, "learning_rate": 5.7739204615025975e-06, "loss": 0.3814, "step": 4799 }, { "epoch": 0.46715328467153283, "grad_norm": 1.652182568924618, "learning_rate": 5.772363263138589e-06, "loss": 0.5886, "step": 4800 }, { "epoch": 0.4672506082725061, "grad_norm": 1.1429975734160958, "learning_rate": 5.770805988024371e-06, "loss": 0.3413, "step": 4801 }, { "epoch": 0.46734793187347934, "grad_norm": 1.432321009479854, "learning_rate": 5.769248636314686e-06, "loss": 0.386, "step": 4802 }, { "epoch": 0.46744525547445254, "grad_norm": 1.3423489652240848, "learning_rate": 5.767691208164291e-06, "loss": 0.3524, "step": 4803 }, { "epoch": 0.4675425790754258, "grad_norm": 1.2135395273781886, "learning_rate": 5.766133703727948e-06, "loss": 0.3286, "step": 4804 }, { "epoch": 0.46763990267639904, "grad_norm": 1.4955897665118012, "learning_rate": 5.76457612316043e-06, "loss": 0.5215, "step": 4805 }, { "epoch": 0.46773722627737224, "grad_norm": 1.3141062762104827, "learning_rate": 5.7630184666165125e-06, "loss": 0.3436, "step": 4806 }, { "epoch": 0.4678345498783455, "grad_norm": 1.7784427808040708, "learning_rate": 5.761460734250981e-06, "loss": 0.3238, "step": 4807 }, { "epoch": 0.46793187347931875, "grad_norm": 1.4663382675516843, "learning_rate": 5.759902926218627e-06, "loss": 0.4586, "step": 4808 }, { "epoch": 0.46802919708029195, "grad_norm": 1.3903432410612793, "learning_rate": 5.758345042674253e-06, "loss": 0.3347, "step": 4809 }, { "epoch": 0.4681265206812652, "grad_norm": 1.1531113993351785, "learning_rate": 5.7567870837726655e-06, "loss": 0.296, "step": 4810 }, { "epoch": 0.46822384428223845, "grad_norm": 1.5297628693915974, "learning_rate": 5.755229049668681e-06, "loss": 0.4743, "step": 4811 }, { "epoch": 0.46832116788321165, "grad_norm": 2.2740622379774096, "learning_rate": 5.753670940517122e-06, "loss": 0.2528, "step": 4812 }, { "epoch": 0.4684184914841849, "grad_norm": 1.3345646636320794, "learning_rate": 5.752112756472818e-06, "loss": 0.4653, "step": 4813 }, { "epoch": 0.46851581508515816, "grad_norm": 1.357079200578268, "learning_rate": 5.7505544976906055e-06, "loss": 0.4277, "step": 4814 }, { "epoch": 0.4686131386861314, "grad_norm": 1.3251376944197377, "learning_rate": 5.748996164325332e-06, "loss": 0.3701, "step": 4815 }, { "epoch": 0.4687104622871046, "grad_norm": 1.2991457443232788, "learning_rate": 5.747437756531851e-06, "loss": 0.4338, "step": 4816 }, { "epoch": 0.46880778588807787, "grad_norm": 1.596269822083475, "learning_rate": 5.7458792744650206e-06, "loss": 0.6416, "step": 4817 }, { "epoch": 0.4689051094890511, "grad_norm": 2.0883872973184534, "learning_rate": 5.7443207182797066e-06, "loss": 0.4152, "step": 4818 }, { "epoch": 0.4690024330900243, "grad_norm": 1.6561593084876625, "learning_rate": 5.742762088130785e-06, "loss": 0.3861, "step": 4819 }, { "epoch": 0.46909975669099757, "grad_norm": 1.2816432721483586, "learning_rate": 5.741203384173139e-06, "loss": 0.3791, "step": 4820 }, { "epoch": 0.4691970802919708, "grad_norm": 1.2696822326085209, "learning_rate": 5.7396446065616585e-06, "loss": 0.3685, "step": 4821 }, { "epoch": 0.469294403892944, "grad_norm": 1.5827454261840233, "learning_rate": 5.738085755451237e-06, "loss": 0.5133, "step": 4822 }, { "epoch": 0.4693917274939173, "grad_norm": 1.5409051543697243, "learning_rate": 5.736526830996782e-06, "loss": 0.3443, "step": 4823 }, { "epoch": 0.46948905109489053, "grad_norm": 1.7383513361485372, "learning_rate": 5.734967833353201e-06, "loss": 0.2957, "step": 4824 }, { "epoch": 0.46958637469586373, "grad_norm": 1.1363874129495541, "learning_rate": 5.733408762675415e-06, "loss": 0.3485, "step": 4825 }, { "epoch": 0.469683698296837, "grad_norm": 1.191790759465704, "learning_rate": 5.73184961911835e-06, "loss": 0.3427, "step": 4826 }, { "epoch": 0.46978102189781024, "grad_norm": 1.496233184133573, "learning_rate": 5.7302904028369386e-06, "loss": 0.4096, "step": 4827 }, { "epoch": 0.46987834549878343, "grad_norm": 1.8262031430102197, "learning_rate": 5.728731113986122e-06, "loss": 0.6644, "step": 4828 }, { "epoch": 0.4699756690997567, "grad_norm": 1.4064090214992204, "learning_rate": 5.727171752720846e-06, "loss": 0.3706, "step": 4829 }, { "epoch": 0.47007299270072994, "grad_norm": 1.411066158000839, "learning_rate": 5.725612319196065e-06, "loss": 0.3495, "step": 4830 }, { "epoch": 0.47017031630170314, "grad_norm": 1.7578993034123143, "learning_rate": 5.724052813566742e-06, "loss": 0.4626, "step": 4831 }, { "epoch": 0.4702676399026764, "grad_norm": 1.412637867571139, "learning_rate": 5.722493235987847e-06, "loss": 0.329, "step": 4832 }, { "epoch": 0.47036496350364965, "grad_norm": 1.3267675306485687, "learning_rate": 5.720933586614355e-06, "loss": 0.401, "step": 4833 }, { "epoch": 0.47046228710462284, "grad_norm": 1.5867750089317443, "learning_rate": 5.719373865601249e-06, "loss": 0.5137, "step": 4834 }, { "epoch": 0.4705596107055961, "grad_norm": 1.4410510775681757, "learning_rate": 5.7178140731035195e-06, "loss": 0.3148, "step": 4835 }, { "epoch": 0.47065693430656935, "grad_norm": 1.1879705199620183, "learning_rate": 5.716254209276163e-06, "loss": 0.3546, "step": 4836 }, { "epoch": 0.4707542579075426, "grad_norm": 1.419039571510067, "learning_rate": 5.714694274274189e-06, "loss": 0.4785, "step": 4837 }, { "epoch": 0.4708515815085158, "grad_norm": 1.3906630607282704, "learning_rate": 5.713134268252603e-06, "loss": 0.4719, "step": 4838 }, { "epoch": 0.47094890510948906, "grad_norm": 1.5072844376178671, "learning_rate": 5.711574191366427e-06, "loss": 0.5591, "step": 4839 }, { "epoch": 0.4710462287104623, "grad_norm": 1.4731496293052737, "learning_rate": 5.710014043770686e-06, "loss": 0.4348, "step": 4840 }, { "epoch": 0.4711435523114355, "grad_norm": 1.4626012294328556, "learning_rate": 5.708453825620413e-06, "loss": 0.3738, "step": 4841 }, { "epoch": 0.47124087591240876, "grad_norm": 1.3952035859287184, "learning_rate": 5.706893537070648e-06, "loss": 0.3287, "step": 4842 }, { "epoch": 0.471338199513382, "grad_norm": 1.2682325087652746, "learning_rate": 5.705333178276439e-06, "loss": 0.3744, "step": 4843 }, { "epoch": 0.4714355231143552, "grad_norm": 1.3258810047392384, "learning_rate": 5.7037727493928374e-06, "loss": 0.4449, "step": 4844 }, { "epoch": 0.47153284671532847, "grad_norm": 1.0974041523713896, "learning_rate": 5.702212250574905e-06, "loss": 0.3161, "step": 4845 }, { "epoch": 0.4716301703163017, "grad_norm": 1.6106032341998873, "learning_rate": 5.7006516819777105e-06, "loss": 0.4791, "step": 4846 }, { "epoch": 0.4717274939172749, "grad_norm": 1.4543270758588145, "learning_rate": 5.699091043756326e-06, "loss": 0.4122, "step": 4847 }, { "epoch": 0.4718248175182482, "grad_norm": 1.1669639424373461, "learning_rate": 5.697530336065837e-06, "loss": 0.3915, "step": 4848 }, { "epoch": 0.4719221411192214, "grad_norm": 1.7484002294309418, "learning_rate": 5.695969559061328e-06, "loss": 0.6337, "step": 4849 }, { "epoch": 0.4720194647201946, "grad_norm": 1.1084749035779666, "learning_rate": 5.694408712897898e-06, "loss": 0.2675, "step": 4850 }, { "epoch": 0.4721167883211679, "grad_norm": 1.7836386743035124, "learning_rate": 5.692847797730644e-06, "loss": 0.2981, "step": 4851 }, { "epoch": 0.47221411192214113, "grad_norm": 1.3478942218702235, "learning_rate": 5.691286813714682e-06, "loss": 0.3685, "step": 4852 }, { "epoch": 0.47231143552311433, "grad_norm": 1.4550135655770304, "learning_rate": 5.6897257610051225e-06, "loss": 0.2745, "step": 4853 }, { "epoch": 0.4724087591240876, "grad_norm": 1.4982703090789, "learning_rate": 5.688164639757091e-06, "loss": 0.1822, "step": 4854 }, { "epoch": 0.47250608272506084, "grad_norm": 1.4542555154292847, "learning_rate": 5.686603450125717e-06, "loss": 0.4963, "step": 4855 }, { "epoch": 0.47260340632603404, "grad_norm": 1.3239793589213282, "learning_rate": 5.685042192266134e-06, "loss": 0.4562, "step": 4856 }, { "epoch": 0.4727007299270073, "grad_norm": 1.5861723026037755, "learning_rate": 5.683480866333489e-06, "loss": 0.502, "step": 4857 }, { "epoch": 0.47279805352798054, "grad_norm": 1.5279472700177954, "learning_rate": 5.68191947248293e-06, "loss": 0.482, "step": 4858 }, { "epoch": 0.4728953771289538, "grad_norm": 1.6153320873369572, "learning_rate": 5.680358010869613e-06, "loss": 0.4353, "step": 4859 }, { "epoch": 0.472992700729927, "grad_norm": 1.348796505837762, "learning_rate": 5.678796481648703e-06, "loss": 0.3374, "step": 4860 }, { "epoch": 0.47309002433090025, "grad_norm": 1.4866060856733978, "learning_rate": 5.677234884975369e-06, "loss": 0.3824, "step": 4861 }, { "epoch": 0.4731873479318735, "grad_norm": 1.5254879968021549, "learning_rate": 5.675673221004788e-06, "loss": 0.449, "step": 4862 }, { "epoch": 0.4732846715328467, "grad_norm": 1.3671322184694132, "learning_rate": 5.674111489892144e-06, "loss": 0.4025, "step": 4863 }, { "epoch": 0.47338199513381995, "grad_norm": 1.0117031965201657, "learning_rate": 5.672549691792629e-06, "loss": 0.2595, "step": 4864 }, { "epoch": 0.4734793187347932, "grad_norm": 1.281972448847432, "learning_rate": 5.670987826861435e-06, "loss": 0.3899, "step": 4865 }, { "epoch": 0.4735766423357664, "grad_norm": 1.3228606064500106, "learning_rate": 5.669425895253769e-06, "loss": 0.3252, "step": 4866 }, { "epoch": 0.47367396593673966, "grad_norm": 1.254391205079037, "learning_rate": 5.66786389712484e-06, "loss": 0.3983, "step": 4867 }, { "epoch": 0.4737712895377129, "grad_norm": 1.2578585598491165, "learning_rate": 5.666301832629866e-06, "loss": 0.3728, "step": 4868 }, { "epoch": 0.4738686131386861, "grad_norm": 1.7176300561518312, "learning_rate": 5.664739701924069e-06, "loss": 0.4013, "step": 4869 }, { "epoch": 0.47396593673965937, "grad_norm": 1.9698213962010664, "learning_rate": 5.663177505162679e-06, "loss": 0.5781, "step": 4870 }, { "epoch": 0.4740632603406326, "grad_norm": 1.4756819102434395, "learning_rate": 5.661615242500933e-06, "loss": 0.3928, "step": 4871 }, { "epoch": 0.4741605839416058, "grad_norm": 1.587339636445031, "learning_rate": 5.660052914094073e-06, "loss": 0.4726, "step": 4872 }, { "epoch": 0.47425790754257907, "grad_norm": 1.5365933978527655, "learning_rate": 5.658490520097351e-06, "loss": 0.4451, "step": 4873 }, { "epoch": 0.4743552311435523, "grad_norm": 1.5917052072915214, "learning_rate": 5.656928060666018e-06, "loss": 0.3587, "step": 4874 }, { "epoch": 0.4744525547445255, "grad_norm": 1.4206774352936657, "learning_rate": 5.655365535955343e-06, "loss": 0.4493, "step": 4875 }, { "epoch": 0.4745498783454988, "grad_norm": 1.340672762004427, "learning_rate": 5.65380294612059e-06, "loss": 0.455, "step": 4876 }, { "epoch": 0.47464720194647203, "grad_norm": 1.3776284584062226, "learning_rate": 5.652240291317037e-06, "loss": 0.3514, "step": 4877 }, { "epoch": 0.47474452554744523, "grad_norm": 1.7446009593383291, "learning_rate": 5.650677571699965e-06, "loss": 0.5945, "step": 4878 }, { "epoch": 0.4748418491484185, "grad_norm": 1.5843637861046846, "learning_rate": 5.6491147874246636e-06, "loss": 0.5681, "step": 4879 }, { "epoch": 0.47493917274939174, "grad_norm": 1.3959518164077136, "learning_rate": 5.647551938646426e-06, "loss": 0.3407, "step": 4880 }, { "epoch": 0.475036496350365, "grad_norm": 1.3839884784582743, "learning_rate": 5.645989025520555e-06, "loss": 0.2746, "step": 4881 }, { "epoch": 0.4751338199513382, "grad_norm": 1.8844954697293113, "learning_rate": 5.644426048202357e-06, "loss": 0.3953, "step": 4882 }, { "epoch": 0.47523114355231144, "grad_norm": 1.2908271501925401, "learning_rate": 5.642863006847146e-06, "loss": 0.3223, "step": 4883 }, { "epoch": 0.4753284671532847, "grad_norm": 1.4328633040445249, "learning_rate": 5.641299901610244e-06, "loss": 0.4966, "step": 4884 }, { "epoch": 0.4754257907542579, "grad_norm": 1.5146698449083573, "learning_rate": 5.639736732646977e-06, "loss": 0.2527, "step": 4885 }, { "epoch": 0.47552311435523115, "grad_norm": 1.3432001275183485, "learning_rate": 5.638173500112676e-06, "loss": 0.4179, "step": 4886 }, { "epoch": 0.4756204379562044, "grad_norm": 1.2322461082999159, "learning_rate": 5.6366102041626825e-06, "loss": 0.3818, "step": 4887 }, { "epoch": 0.4757177615571776, "grad_norm": 1.3767714360266998, "learning_rate": 5.635046844952342e-06, "loss": 0.4691, "step": 4888 }, { "epoch": 0.47581508515815085, "grad_norm": 1.314630958351386, "learning_rate": 5.633483422637005e-06, "loss": 0.4198, "step": 4889 }, { "epoch": 0.4759124087591241, "grad_norm": 1.8890943399906002, "learning_rate": 5.631919937372034e-06, "loss": 0.8231, "step": 4890 }, { "epoch": 0.4760097323600973, "grad_norm": 1.3889867583866484, "learning_rate": 5.6303563893127885e-06, "loss": 0.4546, "step": 4891 }, { "epoch": 0.47610705596107056, "grad_norm": 1.6799912288981764, "learning_rate": 5.62879277861464e-06, "loss": 0.4559, "step": 4892 }, { "epoch": 0.4762043795620438, "grad_norm": 1.4226023731367958, "learning_rate": 5.627229105432968e-06, "loss": 0.2873, "step": 4893 }, { "epoch": 0.476301703163017, "grad_norm": 1.6722706834537924, "learning_rate": 5.6256653699231535e-06, "loss": 0.3074, "step": 4894 }, { "epoch": 0.47639902676399026, "grad_norm": 1.5132051006609315, "learning_rate": 5.624101572240588e-06, "loss": 0.2159, "step": 4895 }, { "epoch": 0.4764963503649635, "grad_norm": 1.4105581624866415, "learning_rate": 5.622537712540664e-06, "loss": 0.3531, "step": 4896 }, { "epoch": 0.4765936739659367, "grad_norm": 1.5128648932544173, "learning_rate": 5.6209737909787864e-06, "loss": 0.583, "step": 4897 }, { "epoch": 0.47669099756690997, "grad_norm": 1.3110258361371465, "learning_rate": 5.619409807710361e-06, "loss": 0.2983, "step": 4898 }, { "epoch": 0.4767883211678832, "grad_norm": 1.297295283960915, "learning_rate": 5.617845762890801e-06, "loss": 0.3841, "step": 4899 }, { "epoch": 0.4768856447688564, "grad_norm": 1.5268473489250836, "learning_rate": 5.616281656675529e-06, "loss": 0.3997, "step": 4900 }, { "epoch": 0.4769829683698297, "grad_norm": 1.1738970794989763, "learning_rate": 5.614717489219969e-06, "loss": 0.3441, "step": 4901 }, { "epoch": 0.4770802919708029, "grad_norm": 1.4550986064519102, "learning_rate": 5.613153260679557e-06, "loss": 0.4218, "step": 4902 }, { "epoch": 0.4771776155717762, "grad_norm": 1.2110212727764387, "learning_rate": 5.611588971209726e-06, "loss": 0.2402, "step": 4903 }, { "epoch": 0.4772749391727494, "grad_norm": 1.341140342133277, "learning_rate": 5.610024620965924e-06, "loss": 0.5273, "step": 4904 }, { "epoch": 0.47737226277372263, "grad_norm": 1.4563750151972352, "learning_rate": 5.608460210103599e-06, "loss": 0.2982, "step": 4905 }, { "epoch": 0.4774695863746959, "grad_norm": 1.4238212595749262, "learning_rate": 5.606895738778211e-06, "loss": 0.4861, "step": 4906 }, { "epoch": 0.4775669099756691, "grad_norm": 1.1703006596087244, "learning_rate": 5.605331207145219e-06, "loss": 0.2687, "step": 4907 }, { "epoch": 0.47766423357664234, "grad_norm": 1.4732598400916934, "learning_rate": 5.603766615360094e-06, "loss": 0.3763, "step": 4908 }, { "epoch": 0.4777615571776156, "grad_norm": 1.2987517140792977, "learning_rate": 5.602201963578308e-06, "loss": 0.3241, "step": 4909 }, { "epoch": 0.4778588807785888, "grad_norm": 1.4014749625096345, "learning_rate": 5.600637251955343e-06, "loss": 0.3606, "step": 4910 }, { "epoch": 0.47795620437956204, "grad_norm": 1.5299494923252082, "learning_rate": 5.599072480646686e-06, "loss": 0.4809, "step": 4911 }, { "epoch": 0.4780535279805353, "grad_norm": 1.5049743030420062, "learning_rate": 5.597507649807828e-06, "loss": 0.552, "step": 4912 }, { "epoch": 0.4781508515815085, "grad_norm": 1.3122576989449315, "learning_rate": 5.595942759594268e-06, "loss": 0.3756, "step": 4913 }, { "epoch": 0.47824817518248175, "grad_norm": 1.2378409291743957, "learning_rate": 5.594377810161509e-06, "loss": 0.3848, "step": 4914 }, { "epoch": 0.478345498783455, "grad_norm": 1.4881152763834071, "learning_rate": 5.592812801665062e-06, "loss": 0.6671, "step": 4915 }, { "epoch": 0.4784428223844282, "grad_norm": 1.083624702787122, "learning_rate": 5.591247734260441e-06, "loss": 0.2953, "step": 4916 }, { "epoch": 0.47854014598540145, "grad_norm": 1.506118534413334, "learning_rate": 5.589682608103172e-06, "loss": 0.5348, "step": 4917 }, { "epoch": 0.4786374695863747, "grad_norm": 1.4154348721520453, "learning_rate": 5.588117423348779e-06, "loss": 0.4859, "step": 4918 }, { "epoch": 0.4787347931873479, "grad_norm": 1.3776882941573374, "learning_rate": 5.586552180152795e-06, "loss": 0.3041, "step": 4919 }, { "epoch": 0.47883211678832116, "grad_norm": 1.4627494794850848, "learning_rate": 5.58498687867076e-06, "loss": 0.5225, "step": 4920 }, { "epoch": 0.4789294403892944, "grad_norm": 1.7558812361768268, "learning_rate": 5.583421519058221e-06, "loss": 0.3869, "step": 4921 }, { "epoch": 0.4790267639902676, "grad_norm": 1.3597510683616933, "learning_rate": 5.5818561014707265e-06, "loss": 0.3343, "step": 4922 }, { "epoch": 0.47912408759124087, "grad_norm": 1.496715844598856, "learning_rate": 5.580290626063833e-06, "loss": 0.6334, "step": 4923 }, { "epoch": 0.4792214111922141, "grad_norm": 1.62323606561308, "learning_rate": 5.578725092993103e-06, "loss": 0.6151, "step": 4924 }, { "epoch": 0.4793187347931874, "grad_norm": 1.3797589224174194, "learning_rate": 5.577159502414105e-06, "loss": 0.3447, "step": 4925 }, { "epoch": 0.47941605839416057, "grad_norm": 1.6225545528980303, "learning_rate": 5.575593854482414e-06, "loss": 0.3989, "step": 4926 }, { "epoch": 0.4795133819951338, "grad_norm": 1.5792786564367016, "learning_rate": 5.574028149353607e-06, "loss": 0.4858, "step": 4927 }, { "epoch": 0.4796107055961071, "grad_norm": 1.6675443086928328, "learning_rate": 5.57246238718327e-06, "loss": 0.5457, "step": 4928 }, { "epoch": 0.4797080291970803, "grad_norm": 1.4119889457286139, "learning_rate": 5.570896568126994e-06, "loss": 0.3692, "step": 4929 }, { "epoch": 0.47980535279805353, "grad_norm": 1.4957501288236537, "learning_rate": 5.569330692340372e-06, "loss": 0.4741, "step": 4930 }, { "epoch": 0.4799026763990268, "grad_norm": 1.4937942402565056, "learning_rate": 5.567764759979013e-06, "loss": 0.4398, "step": 4931 }, { "epoch": 0.48, "grad_norm": 1.302421978464306, "learning_rate": 5.566198771198519e-06, "loss": 0.3684, "step": 4932 }, { "epoch": 0.48009732360097324, "grad_norm": 1.4896919193126248, "learning_rate": 5.564632726154506e-06, "loss": 0.5215, "step": 4933 }, { "epoch": 0.4801946472019465, "grad_norm": 1.3684396296081907, "learning_rate": 5.5630666250025924e-06, "loss": 0.3742, "step": 4934 }, { "epoch": 0.4802919708029197, "grad_norm": 1.4541732944603456, "learning_rate": 5.5615004678984005e-06, "loss": 0.389, "step": 4935 }, { "epoch": 0.48038929440389294, "grad_norm": 1.2989944695669944, "learning_rate": 5.559934254997563e-06, "loss": 0.2933, "step": 4936 }, { "epoch": 0.4804866180048662, "grad_norm": 1.2551515758973009, "learning_rate": 5.558367986455715e-06, "loss": 0.2312, "step": 4937 }, { "epoch": 0.4805839416058394, "grad_norm": 1.6383916276486878, "learning_rate": 5.556801662428497e-06, "loss": 0.3433, "step": 4938 }, { "epoch": 0.48068126520681265, "grad_norm": 1.395987981323709, "learning_rate": 5.555235283071554e-06, "loss": 0.2205, "step": 4939 }, { "epoch": 0.4807785888077859, "grad_norm": 1.5870226074656453, "learning_rate": 5.5536688485405395e-06, "loss": 0.5398, "step": 4940 }, { "epoch": 0.4808759124087591, "grad_norm": 1.4049976282337302, "learning_rate": 5.5521023589911124e-06, "loss": 0.3912, "step": 4941 }, { "epoch": 0.48097323600973235, "grad_norm": 1.4051654448113469, "learning_rate": 5.550535814578935e-06, "loss": 0.3652, "step": 4942 }, { "epoch": 0.4810705596107056, "grad_norm": 1.3749511016473956, "learning_rate": 5.548969215459674e-06, "loss": 0.4801, "step": 4943 }, { "epoch": 0.48116788321167886, "grad_norm": 1.807326200157131, "learning_rate": 5.547402561789007e-06, "loss": 0.6021, "step": 4944 }, { "epoch": 0.48126520681265206, "grad_norm": 1.534704554452748, "learning_rate": 5.545835853722609e-06, "loss": 0.5896, "step": 4945 }, { "epoch": 0.4813625304136253, "grad_norm": 1.5090586202369751, "learning_rate": 5.544269091416165e-06, "loss": 0.3395, "step": 4946 }, { "epoch": 0.48145985401459857, "grad_norm": 1.3434490044175096, "learning_rate": 5.542702275025371e-06, "loss": 0.2682, "step": 4947 }, { "epoch": 0.48155717761557176, "grad_norm": 1.2846389239459437, "learning_rate": 5.541135404705915e-06, "loss": 0.3631, "step": 4948 }, { "epoch": 0.481654501216545, "grad_norm": 1.2126695612690586, "learning_rate": 5.5395684806135046e-06, "loss": 0.2274, "step": 4949 }, { "epoch": 0.48175182481751827, "grad_norm": 1.2719046137031083, "learning_rate": 5.538001502903839e-06, "loss": 0.3379, "step": 4950 }, { "epoch": 0.48184914841849147, "grad_norm": 1.7949003564916597, "learning_rate": 5.536434471732635e-06, "loss": 0.5321, "step": 4951 }, { "epoch": 0.4819464720194647, "grad_norm": 1.5533075390394497, "learning_rate": 5.534867387255607e-06, "loss": 0.3958, "step": 4952 }, { "epoch": 0.482043795620438, "grad_norm": 1.9088016379615553, "learning_rate": 5.533300249628479e-06, "loss": 0.4515, "step": 4953 }, { "epoch": 0.4821411192214112, "grad_norm": 3.884497294261637, "learning_rate": 5.531733059006978e-06, "loss": 0.3338, "step": 4954 }, { "epoch": 0.4822384428223844, "grad_norm": 1.9162863754930761, "learning_rate": 5.530165815546835e-06, "loss": 0.3446, "step": 4955 }, { "epoch": 0.4823357664233577, "grad_norm": 1.1899820789622602, "learning_rate": 5.528598519403788e-06, "loss": 0.3353, "step": 4956 }, { "epoch": 0.4824330900243309, "grad_norm": 1.2085002367353228, "learning_rate": 5.527031170733583e-06, "loss": 0.3172, "step": 4957 }, { "epoch": 0.48253041362530413, "grad_norm": 1.4730257334743548, "learning_rate": 5.525463769691967e-06, "loss": 0.4526, "step": 4958 }, { "epoch": 0.4826277372262774, "grad_norm": 1.5591799749760946, "learning_rate": 5.523896316434692e-06, "loss": 0.462, "step": 4959 }, { "epoch": 0.4827250608272506, "grad_norm": 2.1529087218245127, "learning_rate": 5.522328811117519e-06, "loss": 0.4195, "step": 4960 }, { "epoch": 0.48282238442822384, "grad_norm": 1.744038372737554, "learning_rate": 5.52076125389621e-06, "loss": 0.6677, "step": 4961 }, { "epoch": 0.4829197080291971, "grad_norm": 1.4795186962610711, "learning_rate": 5.5191936449265345e-06, "loss": 0.4061, "step": 4962 }, { "epoch": 0.4830170316301703, "grad_norm": 1.7915332507945234, "learning_rate": 5.517625984364269e-06, "loss": 0.5287, "step": 4963 }, { "epoch": 0.48311435523114354, "grad_norm": 1.5862892399737336, "learning_rate": 5.5160582723651905e-06, "loss": 0.3734, "step": 4964 }, { "epoch": 0.4832116788321168, "grad_norm": 1.3700896061182768, "learning_rate": 5.514490509085084e-06, "loss": 0.4018, "step": 4965 }, { "epoch": 0.48330900243309005, "grad_norm": 1.3501396848679752, "learning_rate": 5.512922694679739e-06, "loss": 0.3885, "step": 4966 }, { "epoch": 0.48340632603406325, "grad_norm": 1.3364214104388243, "learning_rate": 5.511354829304952e-06, "loss": 0.4145, "step": 4967 }, { "epoch": 0.4835036496350365, "grad_norm": 1.5072023412269178, "learning_rate": 5.509786913116521e-06, "loss": 0.4158, "step": 4968 }, { "epoch": 0.48360097323600976, "grad_norm": 1.433925746133814, "learning_rate": 5.508218946270251e-06, "loss": 0.3939, "step": 4969 }, { "epoch": 0.48369829683698295, "grad_norm": 1.3899463326227424, "learning_rate": 5.5066509289219505e-06, "loss": 0.427, "step": 4970 }, { "epoch": 0.4837956204379562, "grad_norm": 1.3961752382649586, "learning_rate": 5.505082861227437e-06, "loss": 0.3507, "step": 4971 }, { "epoch": 0.48389294403892946, "grad_norm": 1.4261266002714594, "learning_rate": 5.50351474334253e-06, "loss": 0.3776, "step": 4972 }, { "epoch": 0.48399026763990266, "grad_norm": 1.3049976642631587, "learning_rate": 5.501946575423051e-06, "loss": 0.2857, "step": 4973 }, { "epoch": 0.4840875912408759, "grad_norm": 1.7623259519308563, "learning_rate": 5.500378357624835e-06, "loss": 0.4472, "step": 4974 }, { "epoch": 0.48418491484184917, "grad_norm": 1.6750772771516935, "learning_rate": 5.498810090103712e-06, "loss": 0.3952, "step": 4975 }, { "epoch": 0.48428223844282237, "grad_norm": 1.326746820196234, "learning_rate": 5.4972417730155256e-06, "loss": 0.2186, "step": 4976 }, { "epoch": 0.4843795620437956, "grad_norm": 2.6917502926648904, "learning_rate": 5.4956734065161176e-06, "loss": 0.4336, "step": 4977 }, { "epoch": 0.4844768856447689, "grad_norm": 1.5933122220629257, "learning_rate": 5.494104990761338e-06, "loss": 0.3989, "step": 4978 }, { "epoch": 0.48457420924574207, "grad_norm": 1.562234703429822, "learning_rate": 5.492536525907042e-06, "loss": 0.28, "step": 4979 }, { "epoch": 0.4846715328467153, "grad_norm": 1.155189440165412, "learning_rate": 5.490968012109089e-06, "loss": 0.3582, "step": 4980 }, { "epoch": 0.4847688564476886, "grad_norm": 1.504468698725549, "learning_rate": 5.489399449523343e-06, "loss": 0.4868, "step": 4981 }, { "epoch": 0.4848661800486618, "grad_norm": 1.6600998094725776, "learning_rate": 5.4878308383056735e-06, "loss": 0.551, "step": 4982 }, { "epoch": 0.48496350364963503, "grad_norm": 1.6646907856061826, "learning_rate": 5.486262178611953e-06, "loss": 0.3845, "step": 4983 }, { "epoch": 0.4850608272506083, "grad_norm": 2.203031497350937, "learning_rate": 5.484693470598061e-06, "loss": 0.5805, "step": 4984 }, { "epoch": 0.4851581508515815, "grad_norm": 1.3567660936308192, "learning_rate": 5.483124714419881e-06, "loss": 0.2999, "step": 4985 }, { "epoch": 0.48525547445255474, "grad_norm": 1.2357480102807044, "learning_rate": 5.4815559102333005e-06, "loss": 0.3387, "step": 4986 }, { "epoch": 0.485352798053528, "grad_norm": 1.6859951373581095, "learning_rate": 5.479987058194214e-06, "loss": 0.4272, "step": 4987 }, { "epoch": 0.48545012165450124, "grad_norm": 1.5177938928969597, "learning_rate": 5.478418158458518e-06, "loss": 0.5521, "step": 4988 }, { "epoch": 0.48554744525547444, "grad_norm": 1.2753160346799406, "learning_rate": 5.476849211182115e-06, "loss": 0.4378, "step": 4989 }, { "epoch": 0.4856447688564477, "grad_norm": 1.3387716024100207, "learning_rate": 5.475280216520913e-06, "loss": 0.4093, "step": 4990 }, { "epoch": 0.48574209245742095, "grad_norm": 1.4017262068692473, "learning_rate": 5.473711174630826e-06, "loss": 0.3709, "step": 4991 }, { "epoch": 0.48583941605839415, "grad_norm": 1.4983315725744055, "learning_rate": 5.472142085667767e-06, "loss": 0.4051, "step": 4992 }, { "epoch": 0.4859367396593674, "grad_norm": 1.3419200057791822, "learning_rate": 5.470572949787658e-06, "loss": 0.4465, "step": 4993 }, { "epoch": 0.48603406326034065, "grad_norm": 1.0828551312926495, "learning_rate": 5.469003767146426e-06, "loss": 0.2349, "step": 4994 }, { "epoch": 0.48613138686131385, "grad_norm": 1.1408772646849228, "learning_rate": 5.4674345379e-06, "loss": 0.2653, "step": 4995 }, { "epoch": 0.4862287104622871, "grad_norm": 1.595169781345277, "learning_rate": 5.46586526220432e-06, "loss": 0.3262, "step": 4996 }, { "epoch": 0.48632603406326036, "grad_norm": 1.3117900017930924, "learning_rate": 5.4642959402153205e-06, "loss": 0.3817, "step": 4997 }, { "epoch": 0.48642335766423356, "grad_norm": 1.4679453654555827, "learning_rate": 5.462726572088949e-06, "loss": 0.3647, "step": 4998 }, { "epoch": 0.4865206812652068, "grad_norm": 1.7158286496667112, "learning_rate": 5.46115715798115e-06, "loss": 0.5639, "step": 4999 }, { "epoch": 0.48661800486618007, "grad_norm": 1.6549363594842164, "learning_rate": 5.459587698047886e-06, "loss": 0.5489, "step": 5000 }, { "epoch": 0.48671532846715326, "grad_norm": 2.1898319348857926, "learning_rate": 5.458018192445108e-06, "loss": 0.4976, "step": 5001 }, { "epoch": 0.4868126520681265, "grad_norm": 1.5974613434557499, "learning_rate": 5.45644864132878e-06, "loss": 0.3197, "step": 5002 }, { "epoch": 0.48690997566909977, "grad_norm": 1.3887355698626391, "learning_rate": 5.45487904485487e-06, "loss": 0.2057, "step": 5003 }, { "epoch": 0.48700729927007297, "grad_norm": 1.6059660129870075, "learning_rate": 5.45330940317935e-06, "loss": 0.4177, "step": 5004 }, { "epoch": 0.4871046228710462, "grad_norm": 1.5738497696839364, "learning_rate": 5.451739716458196e-06, "loss": 0.345, "step": 5005 }, { "epoch": 0.4872019464720195, "grad_norm": 1.77737970001417, "learning_rate": 5.450169984847389e-06, "loss": 0.4421, "step": 5006 }, { "epoch": 0.4872992700729927, "grad_norm": 1.7144957169695934, "learning_rate": 5.4486002085029145e-06, "loss": 0.3191, "step": 5007 }, { "epoch": 0.4873965936739659, "grad_norm": 1.3777116133883833, "learning_rate": 5.44703038758076e-06, "loss": 0.3365, "step": 5008 }, { "epoch": 0.4874939172749392, "grad_norm": 1.4604287307103112, "learning_rate": 5.445460522236923e-06, "loss": 0.4651, "step": 5009 }, { "epoch": 0.48759124087591244, "grad_norm": 1.4975005624607667, "learning_rate": 5.443890612627398e-06, "loss": 0.3582, "step": 5010 }, { "epoch": 0.48768856447688563, "grad_norm": 1.5004432341259126, "learning_rate": 5.44232065890819e-06, "loss": 0.4919, "step": 5011 }, { "epoch": 0.4877858880778589, "grad_norm": 1.7923642803829052, "learning_rate": 5.440750661235308e-06, "loss": 0.4711, "step": 5012 }, { "epoch": 0.48788321167883214, "grad_norm": 1.455676965568023, "learning_rate": 5.439180619764761e-06, "loss": 0.5366, "step": 5013 }, { "epoch": 0.48798053527980534, "grad_norm": 1.4412783847736603, "learning_rate": 5.437610534652567e-06, "loss": 0.4822, "step": 5014 }, { "epoch": 0.4880778588807786, "grad_norm": 1.4061603517567263, "learning_rate": 5.4360404060547424e-06, "loss": 0.4498, "step": 5015 }, { "epoch": 0.48817518248175185, "grad_norm": 1.2205597572059488, "learning_rate": 5.434470234127317e-06, "loss": 0.2444, "step": 5016 }, { "epoch": 0.48827250608272504, "grad_norm": 1.1571719643633969, "learning_rate": 5.432900019026316e-06, "loss": 0.372, "step": 5017 }, { "epoch": 0.4883698296836983, "grad_norm": 1.2545109416777944, "learning_rate": 5.431329760907775e-06, "loss": 0.2478, "step": 5018 }, { "epoch": 0.48846715328467155, "grad_norm": 1.58204119643864, "learning_rate": 5.429759459927731e-06, "loss": 0.7143, "step": 5019 }, { "epoch": 0.48856447688564475, "grad_norm": 1.380213477167105, "learning_rate": 5.428189116242224e-06, "loss": 0.2741, "step": 5020 }, { "epoch": 0.488661800486618, "grad_norm": 1.208192532107823, "learning_rate": 5.426618730007303e-06, "loss": 0.3596, "step": 5021 }, { "epoch": 0.48875912408759126, "grad_norm": 1.4361021460116605, "learning_rate": 5.4250483013790146e-06, "loss": 0.5025, "step": 5022 }, { "epoch": 0.48885644768856445, "grad_norm": 1.1517878856617345, "learning_rate": 5.423477830513416e-06, "loss": 0.2839, "step": 5023 }, { "epoch": 0.4889537712895377, "grad_norm": 1.7188803883425718, "learning_rate": 5.421907317566566e-06, "loss": 0.6908, "step": 5024 }, { "epoch": 0.48905109489051096, "grad_norm": 1.4039101950325197, "learning_rate": 5.420336762694524e-06, "loss": 0.4987, "step": 5025 }, { "epoch": 0.48914841849148416, "grad_norm": 1.0076942078711917, "learning_rate": 5.418766166053362e-06, "loss": 0.181, "step": 5026 }, { "epoch": 0.4892457420924574, "grad_norm": 1.3958603965451082, "learning_rate": 5.4171955277991484e-06, "loss": 0.4415, "step": 5027 }, { "epoch": 0.48934306569343067, "grad_norm": 1.4424291769852293, "learning_rate": 5.415624848087959e-06, "loss": 0.3817, "step": 5028 }, { "epoch": 0.48944038929440387, "grad_norm": 1.685222430956794, "learning_rate": 5.414054127075872e-06, "loss": 0.4739, "step": 5029 }, { "epoch": 0.4895377128953771, "grad_norm": 1.674234312041242, "learning_rate": 5.412483364918972e-06, "loss": 0.481, "step": 5030 }, { "epoch": 0.4896350364963504, "grad_norm": 1.530713201359312, "learning_rate": 5.410912561773346e-06, "loss": 0.1731, "step": 5031 }, { "epoch": 0.4897323600973236, "grad_norm": 1.7243115696123834, "learning_rate": 5.409341717795088e-06, "loss": 0.6449, "step": 5032 }, { "epoch": 0.4898296836982968, "grad_norm": 1.197138021866118, "learning_rate": 5.40777083314029e-06, "loss": 0.2843, "step": 5033 }, { "epoch": 0.4899270072992701, "grad_norm": 1.4747319252677107, "learning_rate": 5.406199907965055e-06, "loss": 0.4054, "step": 5034 }, { "epoch": 0.49002433090024333, "grad_norm": 1.2706173989537235, "learning_rate": 5.404628942425485e-06, "loss": 0.3084, "step": 5035 }, { "epoch": 0.49012165450121653, "grad_norm": 1.150549782071767, "learning_rate": 5.403057936677688e-06, "loss": 0.3388, "step": 5036 }, { "epoch": 0.4902189781021898, "grad_norm": 1.524267430312653, "learning_rate": 5.401486890877777e-06, "loss": 0.515, "step": 5037 }, { "epoch": 0.49031630170316304, "grad_norm": 1.5902620932052205, "learning_rate": 5.399915805181866e-06, "loss": 0.4287, "step": 5038 }, { "epoch": 0.49041362530413624, "grad_norm": 1.2788113903176752, "learning_rate": 5.398344679746077e-06, "loss": 0.3266, "step": 5039 }, { "epoch": 0.4905109489051095, "grad_norm": 1.5081258655708445, "learning_rate": 5.39677351472653e-06, "loss": 0.4176, "step": 5040 }, { "epoch": 0.49060827250608274, "grad_norm": 1.8789293442415165, "learning_rate": 5.395202310279356e-06, "loss": 0.3402, "step": 5041 }, { "epoch": 0.49070559610705594, "grad_norm": 1.7754391033071233, "learning_rate": 5.393631066560685e-06, "loss": 0.5186, "step": 5042 }, { "epoch": 0.4908029197080292, "grad_norm": 1.4392182405674039, "learning_rate": 5.392059783726655e-06, "loss": 0.3767, "step": 5043 }, { "epoch": 0.49090024330900245, "grad_norm": 1.284098552272941, "learning_rate": 5.3904884619334005e-06, "loss": 0.2681, "step": 5044 }, { "epoch": 0.49099756690997565, "grad_norm": 1.7355035083134127, "learning_rate": 5.38891710133707e-06, "loss": 0.4057, "step": 5045 }, { "epoch": 0.4910948905109489, "grad_norm": 1.4529751115834646, "learning_rate": 5.387345702093807e-06, "loss": 0.3407, "step": 5046 }, { "epoch": 0.49119221411192215, "grad_norm": 1.6764104057784581, "learning_rate": 5.385774264359763e-06, "loss": 0.4085, "step": 5047 }, { "epoch": 0.49128953771289535, "grad_norm": 2.040083005001846, "learning_rate": 5.384202788291095e-06, "loss": 0.2671, "step": 5048 }, { "epoch": 0.4913868613138686, "grad_norm": 1.4438382439666158, "learning_rate": 5.382631274043958e-06, "loss": 0.4988, "step": 5049 }, { "epoch": 0.49148418491484186, "grad_norm": 1.3415722610399325, "learning_rate": 5.3810597217745175e-06, "loss": 0.3087, "step": 5050 }, { "epoch": 0.49158150851581506, "grad_norm": 1.7837742103051073, "learning_rate": 5.379488131638937e-06, "loss": 0.7146, "step": 5051 }, { "epoch": 0.4916788321167883, "grad_norm": 1.6439537792298233, "learning_rate": 5.377916503793388e-06, "loss": 0.4824, "step": 5052 }, { "epoch": 0.49177615571776157, "grad_norm": 1.416325902159072, "learning_rate": 5.376344838394043e-06, "loss": 0.4164, "step": 5053 }, { "epoch": 0.4918734793187348, "grad_norm": 2.45441320332752, "learning_rate": 5.374773135597081e-06, "loss": 0.2579, "step": 5054 }, { "epoch": 0.491970802919708, "grad_norm": 1.91651852353104, "learning_rate": 5.373201395558684e-06, "loss": 0.2235, "step": 5055 }, { "epoch": 0.49206812652068127, "grad_norm": 1.515787332237694, "learning_rate": 5.371629618435031e-06, "loss": 0.5142, "step": 5056 }, { "epoch": 0.4921654501216545, "grad_norm": 1.6655105952324112, "learning_rate": 5.370057804382317e-06, "loss": 0.3892, "step": 5057 }, { "epoch": 0.4922627737226277, "grad_norm": 1.5378211577647913, "learning_rate": 5.36848595355673e-06, "loss": 0.5481, "step": 5058 }, { "epoch": 0.492360097323601, "grad_norm": 1.58946159699911, "learning_rate": 5.366914066114469e-06, "loss": 0.4617, "step": 5059 }, { "epoch": 0.49245742092457423, "grad_norm": 1.3627819469502345, "learning_rate": 5.36534214221173e-06, "loss": 0.3906, "step": 5060 }, { "epoch": 0.4925547445255474, "grad_norm": 1.1795425831286543, "learning_rate": 5.36377018200472e-06, "loss": 0.2812, "step": 5061 }, { "epoch": 0.4926520681265207, "grad_norm": 1.404350778449777, "learning_rate": 5.362198185649642e-06, "loss": 0.4139, "step": 5062 }, { "epoch": 0.49274939172749394, "grad_norm": 1.3558242727366516, "learning_rate": 5.360626153302707e-06, "loss": 0.2076, "step": 5063 }, { "epoch": 0.49284671532846713, "grad_norm": 1.544055179593263, "learning_rate": 5.359054085120131e-06, "loss": 0.3684, "step": 5064 }, { "epoch": 0.4929440389294404, "grad_norm": 1.2038715077807927, "learning_rate": 5.357481981258129e-06, "loss": 0.2324, "step": 5065 }, { "epoch": 0.49304136253041364, "grad_norm": 1.6667516766166808, "learning_rate": 5.355909841872924e-06, "loss": 0.5443, "step": 5066 }, { "epoch": 0.49313868613138684, "grad_norm": 1.7590657831440246, "learning_rate": 5.354337667120737e-06, "loss": 0.7178, "step": 5067 }, { "epoch": 0.4932360097323601, "grad_norm": 1.771383028170083, "learning_rate": 5.352765457157799e-06, "loss": 0.4871, "step": 5068 }, { "epoch": 0.49333333333333335, "grad_norm": 1.305854229998231, "learning_rate": 5.351193212140341e-06, "loss": 0.28, "step": 5069 }, { "epoch": 0.49343065693430654, "grad_norm": 1.6390936834898198, "learning_rate": 5.349620932224598e-06, "loss": 0.4043, "step": 5070 }, { "epoch": 0.4935279805352798, "grad_norm": 1.3421541540690891, "learning_rate": 5.348048617566808e-06, "loss": 0.3224, "step": 5071 }, { "epoch": 0.49362530413625305, "grad_norm": 1.397997994859428, "learning_rate": 5.346476268323213e-06, "loss": 0.4431, "step": 5072 }, { "epoch": 0.4937226277372263, "grad_norm": 1.6316665205340113, "learning_rate": 5.3449038846500575e-06, "loss": 0.3536, "step": 5073 }, { "epoch": 0.4938199513381995, "grad_norm": 1.5267178162636428, "learning_rate": 5.343331466703592e-06, "loss": 0.4414, "step": 5074 }, { "epoch": 0.49391727493917276, "grad_norm": 1.7290466527386708, "learning_rate": 5.341759014640068e-06, "loss": 0.583, "step": 5075 }, { "epoch": 0.494014598540146, "grad_norm": 1.4809204861753724, "learning_rate": 5.340186528615738e-06, "loss": 0.4611, "step": 5076 }, { "epoch": 0.4941119221411192, "grad_norm": 1.7473791115003812, "learning_rate": 5.3386140087868665e-06, "loss": 0.3554, "step": 5077 }, { "epoch": 0.49420924574209246, "grad_norm": 1.5159704070411184, "learning_rate": 5.337041455309712e-06, "loss": 0.482, "step": 5078 }, { "epoch": 0.4943065693430657, "grad_norm": 1.3566136430802587, "learning_rate": 5.3354688683405396e-06, "loss": 0.2888, "step": 5079 }, { "epoch": 0.4944038929440389, "grad_norm": 1.452230034326972, "learning_rate": 5.33389624803562e-06, "loss": 0.3318, "step": 5080 }, { "epoch": 0.49450121654501217, "grad_norm": 1.4110866869083454, "learning_rate": 5.332323594551227e-06, "loss": 0.4727, "step": 5081 }, { "epoch": 0.4945985401459854, "grad_norm": 1.3293925351527427, "learning_rate": 5.3307509080436324e-06, "loss": 0.3068, "step": 5082 }, { "epoch": 0.4946958637469586, "grad_norm": 1.743561480869901, "learning_rate": 5.329178188669118e-06, "loss": 0.3913, "step": 5083 }, { "epoch": 0.4947931873479319, "grad_norm": 1.213242730034601, "learning_rate": 5.3276054365839626e-06, "loss": 0.2869, "step": 5084 }, { "epoch": 0.4948905109489051, "grad_norm": 1.3356450078569697, "learning_rate": 5.326032651944454e-06, "loss": 0.3197, "step": 5085 }, { "epoch": 0.4949878345498783, "grad_norm": 1.5030506424941168, "learning_rate": 5.324459834906882e-06, "loss": 0.3483, "step": 5086 }, { "epoch": 0.4950851581508516, "grad_norm": 1.7122767532626826, "learning_rate": 5.322886985627535e-06, "loss": 0.3508, "step": 5087 }, { "epoch": 0.49518248175182483, "grad_norm": 1.5107896892277575, "learning_rate": 5.321314104262711e-06, "loss": 0.4561, "step": 5088 }, { "epoch": 0.49527980535279803, "grad_norm": 1.3812632529887294, "learning_rate": 5.319741190968706e-06, "loss": 0.4924, "step": 5089 }, { "epoch": 0.4953771289537713, "grad_norm": 1.5944586281730757, "learning_rate": 5.318168245901823e-06, "loss": 0.3154, "step": 5090 }, { "epoch": 0.49547445255474454, "grad_norm": 1.5509847127971457, "learning_rate": 5.316595269218367e-06, "loss": 0.4957, "step": 5091 }, { "epoch": 0.49557177615571774, "grad_norm": 1.3107801704434123, "learning_rate": 5.315022261074642e-06, "loss": 0.4174, "step": 5092 }, { "epoch": 0.495669099756691, "grad_norm": 1.563988853649021, "learning_rate": 5.313449221626965e-06, "loss": 0.5301, "step": 5093 }, { "epoch": 0.49576642335766424, "grad_norm": 1.2459488069907703, "learning_rate": 5.311876151031642e-06, "loss": 0.2666, "step": 5094 }, { "epoch": 0.4958637469586375, "grad_norm": 1.7150798105069895, "learning_rate": 5.310303049444996e-06, "loss": 0.518, "step": 5095 }, { "epoch": 0.4959610705596107, "grad_norm": 1.490026056970703, "learning_rate": 5.308729917023346e-06, "loss": 0.2927, "step": 5096 }, { "epoch": 0.49605839416058395, "grad_norm": 1.412429878722388, "learning_rate": 5.307156753923014e-06, "loss": 0.3318, "step": 5097 }, { "epoch": 0.4961557177615572, "grad_norm": 1.5612364428308012, "learning_rate": 5.305583560300325e-06, "loss": 0.4288, "step": 5098 }, { "epoch": 0.4962530413625304, "grad_norm": 1.7470922836534188, "learning_rate": 5.304010336311611e-06, "loss": 0.3495, "step": 5099 }, { "epoch": 0.49635036496350365, "grad_norm": 1.4355178103456536, "learning_rate": 5.302437082113203e-06, "loss": 0.4107, "step": 5100 }, { "epoch": 0.4964476885644769, "grad_norm": 1.3910614357496167, "learning_rate": 5.300863797861436e-06, "loss": 0.3276, "step": 5101 }, { "epoch": 0.4965450121654501, "grad_norm": 1.369456647896358, "learning_rate": 5.29929048371265e-06, "loss": 0.3209, "step": 5102 }, { "epoch": 0.49664233576642336, "grad_norm": 1.3194231276993127, "learning_rate": 5.297717139823183e-06, "loss": 0.2718, "step": 5103 }, { "epoch": 0.4967396593673966, "grad_norm": 1.5736389606985905, "learning_rate": 5.2961437663493805e-06, "loss": 0.4813, "step": 5104 }, { "epoch": 0.4968369829683698, "grad_norm": 1.1553842551060758, "learning_rate": 5.294570363447589e-06, "loss": 0.3266, "step": 5105 }, { "epoch": 0.49693430656934306, "grad_norm": 1.586373555787601, "learning_rate": 5.2929969312741625e-06, "loss": 0.5386, "step": 5106 }, { "epoch": 0.4970316301703163, "grad_norm": 1.8884361774869836, "learning_rate": 5.291423469985449e-06, "loss": 0.5736, "step": 5107 }, { "epoch": 0.4971289537712895, "grad_norm": 1.5461639928065092, "learning_rate": 5.289849979737808e-06, "loss": 0.5066, "step": 5108 }, { "epoch": 0.49722627737226277, "grad_norm": 1.1938311304614138, "learning_rate": 5.288276460687595e-06, "loss": 0.3225, "step": 5109 }, { "epoch": 0.497323600973236, "grad_norm": 1.5838662137551112, "learning_rate": 5.286702912991172e-06, "loss": 0.4726, "step": 5110 }, { "epoch": 0.4974209245742092, "grad_norm": 1.6421001523613388, "learning_rate": 5.285129336804905e-06, "loss": 0.4279, "step": 5111 }, { "epoch": 0.4975182481751825, "grad_norm": 1.965461770271289, "learning_rate": 5.283555732285161e-06, "loss": 0.3888, "step": 5112 }, { "epoch": 0.49761557177615573, "grad_norm": 1.408633437525859, "learning_rate": 5.28198209958831e-06, "loss": 0.3822, "step": 5113 }, { "epoch": 0.4977128953771289, "grad_norm": 1.3523571346508707, "learning_rate": 5.280408438870723e-06, "loss": 0.3911, "step": 5114 }, { "epoch": 0.4978102189781022, "grad_norm": 1.2093674849244664, "learning_rate": 5.2788347502887775e-06, "loss": 0.2573, "step": 5115 }, { "epoch": 0.49790754257907544, "grad_norm": 1.5253078618962868, "learning_rate": 5.277261033998852e-06, "loss": 0.3471, "step": 5116 }, { "epoch": 0.4980048661800487, "grad_norm": 1.5897083271657746, "learning_rate": 5.2756872901573275e-06, "loss": 0.4199, "step": 5117 }, { "epoch": 0.4981021897810219, "grad_norm": 2.0022642907947117, "learning_rate": 5.274113518920586e-06, "loss": 0.3263, "step": 5118 }, { "epoch": 0.49819951338199514, "grad_norm": 1.6320439770514106, "learning_rate": 5.272539720445017e-06, "loss": 0.5367, "step": 5119 }, { "epoch": 0.4982968369829684, "grad_norm": 2.330014786734751, "learning_rate": 5.270965894887008e-06, "loss": 0.4306, "step": 5120 }, { "epoch": 0.4983941605839416, "grad_norm": 1.47016643645919, "learning_rate": 5.269392042402951e-06, "loss": 0.4917, "step": 5121 }, { "epoch": 0.49849148418491485, "grad_norm": 1.239272241530952, "learning_rate": 5.267818163149242e-06, "loss": 0.2625, "step": 5122 }, { "epoch": 0.4985888077858881, "grad_norm": 1.249212780068013, "learning_rate": 5.266244257282277e-06, "loss": 0.2373, "step": 5123 }, { "epoch": 0.4986861313868613, "grad_norm": 1.2338533868729509, "learning_rate": 5.264670324958458e-06, "loss": 0.3284, "step": 5124 }, { "epoch": 0.49878345498783455, "grad_norm": 1.4536950418673635, "learning_rate": 5.2630963663341835e-06, "loss": 0.3611, "step": 5125 }, { "epoch": 0.4988807785888078, "grad_norm": 1.849642695899207, "learning_rate": 5.261522381565863e-06, "loss": 0.432, "step": 5126 }, { "epoch": 0.498978102189781, "grad_norm": 1.1337195612513011, "learning_rate": 5.259948370809902e-06, "loss": 0.2563, "step": 5127 }, { "epoch": 0.49907542579075426, "grad_norm": 1.9535786826596058, "learning_rate": 5.258374334222712e-06, "loss": 0.4362, "step": 5128 }, { "epoch": 0.4991727493917275, "grad_norm": 1.5933493316251561, "learning_rate": 5.256800271960707e-06, "loss": 0.264, "step": 5129 }, { "epoch": 0.4992700729927007, "grad_norm": 1.7529896675410312, "learning_rate": 5.2552261841803e-06, "loss": 0.3878, "step": 5130 }, { "epoch": 0.49936739659367396, "grad_norm": 1.3258193253920645, "learning_rate": 5.2536520710379095e-06, "loss": 0.3274, "step": 5131 }, { "epoch": 0.4994647201946472, "grad_norm": 1.4931087346426537, "learning_rate": 5.252077932689956e-06, "loss": 0.3069, "step": 5132 }, { "epoch": 0.4995620437956204, "grad_norm": 1.3760062783730125, "learning_rate": 5.2505037692928654e-06, "loss": 0.3101, "step": 5133 }, { "epoch": 0.49965936739659367, "grad_norm": 1.564688027947308, "learning_rate": 5.248929581003061e-06, "loss": 0.327, "step": 5134 }, { "epoch": 0.4997566909975669, "grad_norm": 1.4480734358338416, "learning_rate": 5.247355367976971e-06, "loss": 0.3627, "step": 5135 }, { "epoch": 0.4998540145985401, "grad_norm": 1.2929757570382263, "learning_rate": 5.245781130371025e-06, "loss": 0.3447, "step": 5136 }, { "epoch": 0.4999513381995134, "grad_norm": 0.9760162656906078, "learning_rate": 5.244206868341657e-06, "loss": 0.1752, "step": 5137 }, { "epoch": 0.5000486618004866, "grad_norm": 1.3141856948855382, "learning_rate": 5.242632582045304e-06, "loss": 0.3896, "step": 5138 }, { "epoch": 0.5001459854014598, "grad_norm": 1.3324418763443528, "learning_rate": 5.241058271638401e-06, "loss": 0.3976, "step": 5139 }, { "epoch": 0.5002433090024331, "grad_norm": 1.2800905517496453, "learning_rate": 5.23948393727739e-06, "loss": 0.2589, "step": 5140 }, { "epoch": 0.5003406326034063, "grad_norm": 1.5624023406980514, "learning_rate": 5.237909579118713e-06, "loss": 0.2955, "step": 5141 }, { "epoch": 0.5004379562043796, "grad_norm": 1.2296745037860775, "learning_rate": 5.236335197318814e-06, "loss": 0.3862, "step": 5142 }, { "epoch": 0.5005352798053528, "grad_norm": 1.4387826587407209, "learning_rate": 5.23476079203414e-06, "loss": 0.4948, "step": 5143 }, { "epoch": 0.500632603406326, "grad_norm": 1.4881502028713878, "learning_rate": 5.2331863634211455e-06, "loss": 0.3596, "step": 5144 }, { "epoch": 0.5007299270072992, "grad_norm": 1.5866263260775346, "learning_rate": 5.2316119116362765e-06, "loss": 0.4472, "step": 5145 }, { "epoch": 0.5008272506082725, "grad_norm": 1.3412917962010176, "learning_rate": 5.23003743683599e-06, "loss": 0.4589, "step": 5146 }, { "epoch": 0.5009245742092457, "grad_norm": 1.3788291758320874, "learning_rate": 5.2284629391767405e-06, "loss": 0.474, "step": 5147 }, { "epoch": 0.501021897810219, "grad_norm": 1.9449824164216007, "learning_rate": 5.22688841881499e-06, "loss": 0.2456, "step": 5148 }, { "epoch": 0.5011192214111923, "grad_norm": 1.538030287931762, "learning_rate": 5.225313875907198e-06, "loss": 0.4345, "step": 5149 }, { "epoch": 0.5012165450121655, "grad_norm": 1.184014727065925, "learning_rate": 5.223739310609827e-06, "loss": 0.2862, "step": 5150 }, { "epoch": 0.5013138686131386, "grad_norm": 1.4929447349800702, "learning_rate": 5.222164723079344e-06, "loss": 0.571, "step": 5151 }, { "epoch": 0.5014111922141119, "grad_norm": 1.007335106203281, "learning_rate": 5.220590113472214e-06, "loss": 0.236, "step": 5152 }, { "epoch": 0.5015085158150852, "grad_norm": 1.106142142258815, "learning_rate": 5.21901548194491e-06, "loss": 0.264, "step": 5153 }, { "epoch": 0.5016058394160584, "grad_norm": 1.1532992264954316, "learning_rate": 5.217440828653902e-06, "loss": 0.1964, "step": 5154 }, { "epoch": 0.5017031630170317, "grad_norm": 1.4016312087960912, "learning_rate": 5.215866153755667e-06, "loss": 0.3007, "step": 5155 }, { "epoch": 0.5018004866180049, "grad_norm": 1.4902958088582248, "learning_rate": 5.214291457406679e-06, "loss": 0.6606, "step": 5156 }, { "epoch": 0.5018978102189781, "grad_norm": 1.800707966602163, "learning_rate": 5.212716739763417e-06, "loss": 0.2991, "step": 5157 }, { "epoch": 0.5019951338199513, "grad_norm": 1.1795829069746984, "learning_rate": 5.211142000982361e-06, "loss": 0.232, "step": 5158 }, { "epoch": 0.5020924574209246, "grad_norm": 1.3363897439309071, "learning_rate": 5.209567241219995e-06, "loss": 0.3185, "step": 5159 }, { "epoch": 0.5021897810218978, "grad_norm": 1.4984272022708915, "learning_rate": 5.207992460632805e-06, "loss": 0.4622, "step": 5160 }, { "epoch": 0.5022871046228711, "grad_norm": 1.6346673240867733, "learning_rate": 5.206417659377274e-06, "loss": 0.6491, "step": 5161 }, { "epoch": 0.5023844282238443, "grad_norm": 1.4843904193278086, "learning_rate": 5.204842837609896e-06, "loss": 0.531, "step": 5162 }, { "epoch": 0.5024817518248175, "grad_norm": 1.4928961863467327, "learning_rate": 5.203267995487156e-06, "loss": 0.4246, "step": 5163 }, { "epoch": 0.5025790754257907, "grad_norm": 1.403113831133665, "learning_rate": 5.201693133165553e-06, "loss": 0.3214, "step": 5164 }, { "epoch": 0.502676399026764, "grad_norm": 1.418220182463448, "learning_rate": 5.200118250801579e-06, "loss": 0.3571, "step": 5165 }, { "epoch": 0.5027737226277372, "grad_norm": 1.541211172637322, "learning_rate": 5.19854334855173e-06, "loss": 0.4525, "step": 5166 }, { "epoch": 0.5028710462287105, "grad_norm": 1.479374705267467, "learning_rate": 5.196968426572509e-06, "loss": 0.4558, "step": 5167 }, { "epoch": 0.5029683698296837, "grad_norm": 1.3299527326229879, "learning_rate": 5.19539348502041e-06, "loss": 0.3289, "step": 5168 }, { "epoch": 0.5030656934306569, "grad_norm": 1.4899737328895861, "learning_rate": 5.193818524051944e-06, "loss": 0.3758, "step": 5169 }, { "epoch": 0.5031630170316301, "grad_norm": 1.6762585082609824, "learning_rate": 5.192243543823611e-06, "loss": 0.4284, "step": 5170 }, { "epoch": 0.5032603406326034, "grad_norm": 1.3559478880340818, "learning_rate": 5.190668544491919e-06, "loss": 0.2668, "step": 5171 }, { "epoch": 0.5033576642335766, "grad_norm": 1.379702459753202, "learning_rate": 5.1890935262133765e-06, "loss": 0.3795, "step": 5172 }, { "epoch": 0.5034549878345499, "grad_norm": 1.2992525941589073, "learning_rate": 5.187518489144494e-06, "loss": 0.2682, "step": 5173 }, { "epoch": 0.5035523114355231, "grad_norm": 1.2960847952029224, "learning_rate": 5.1859434334417845e-06, "loss": 0.3201, "step": 5174 }, { "epoch": 0.5036496350364964, "grad_norm": 1.5377677467251165, "learning_rate": 5.184368359261761e-06, "loss": 0.2732, "step": 5175 }, { "epoch": 0.5037469586374695, "grad_norm": 1.2077602419583777, "learning_rate": 5.182793266760942e-06, "loss": 0.3052, "step": 5176 }, { "epoch": 0.5038442822384428, "grad_norm": 1.3311778268368157, "learning_rate": 5.181218156095842e-06, "loss": 0.3701, "step": 5177 }, { "epoch": 0.503941605839416, "grad_norm": 1.6803458409604715, "learning_rate": 5.179643027422983e-06, "loss": 0.6306, "step": 5178 }, { "epoch": 0.5040389294403893, "grad_norm": 1.7291322114874463, "learning_rate": 5.178067880898884e-06, "loss": 0.4439, "step": 5179 }, { "epoch": 0.5041362530413626, "grad_norm": 1.3614150776177938, "learning_rate": 5.176492716680072e-06, "loss": 0.3516, "step": 5180 }, { "epoch": 0.5042335766423358, "grad_norm": 1.4597611204449916, "learning_rate": 5.174917534923071e-06, "loss": 0.5477, "step": 5181 }, { "epoch": 0.504330900243309, "grad_norm": 1.192848258438179, "learning_rate": 5.173342335784407e-06, "loss": 0.3049, "step": 5182 }, { "epoch": 0.5044282238442822, "grad_norm": 1.3529121407700522, "learning_rate": 5.171767119420609e-06, "loss": 0.3515, "step": 5183 }, { "epoch": 0.5045255474452555, "grad_norm": 1.3189405276532165, "learning_rate": 5.170191885988204e-06, "loss": 0.303, "step": 5184 }, { "epoch": 0.5046228710462287, "grad_norm": 1.3395134432503608, "learning_rate": 5.168616635643728e-06, "loss": 0.4379, "step": 5185 }, { "epoch": 0.504720194647202, "grad_norm": 1.4687489448705902, "learning_rate": 5.167041368543714e-06, "loss": 0.3411, "step": 5186 }, { "epoch": 0.5048175182481752, "grad_norm": 1.4648007806197731, "learning_rate": 5.165466084844697e-06, "loss": 0.4563, "step": 5187 }, { "epoch": 0.5049148418491484, "grad_norm": 1.3408152447498458, "learning_rate": 5.163890784703211e-06, "loss": 0.3668, "step": 5188 }, { "epoch": 0.5050121654501216, "grad_norm": 1.2218158502725498, "learning_rate": 5.1623154682757985e-06, "loss": 0.3419, "step": 5189 }, { "epoch": 0.5051094890510949, "grad_norm": 1.6401215280304642, "learning_rate": 5.160740135718998e-06, "loss": 0.481, "step": 5190 }, { "epoch": 0.5052068126520681, "grad_norm": 1.4872384811163053, "learning_rate": 5.1591647871893525e-06, "loss": 0.5492, "step": 5191 }, { "epoch": 0.5053041362530414, "grad_norm": 1.4379726959575616, "learning_rate": 5.157589422843405e-06, "loss": 0.4766, "step": 5192 }, { "epoch": 0.5054014598540146, "grad_norm": 1.5206530041669701, "learning_rate": 5.156014042837696e-06, "loss": 0.6581, "step": 5193 }, { "epoch": 0.5054987834549879, "grad_norm": 1.395265871915643, "learning_rate": 5.154438647328778e-06, "loss": 0.4679, "step": 5194 }, { "epoch": 0.505596107055961, "grad_norm": 1.684967401115602, "learning_rate": 5.152863236473195e-06, "loss": 0.5604, "step": 5195 }, { "epoch": 0.5056934306569343, "grad_norm": 1.3288036632018836, "learning_rate": 5.151287810427501e-06, "loss": 0.3682, "step": 5196 }, { "epoch": 0.5057907542579075, "grad_norm": 1.5564774986574346, "learning_rate": 5.1497123693482435e-06, "loss": 0.3276, "step": 5197 }, { "epoch": 0.5058880778588808, "grad_norm": 1.4388292220880394, "learning_rate": 5.148136913391976e-06, "loss": 0.4782, "step": 5198 }, { "epoch": 0.505985401459854, "grad_norm": 1.1216554729777923, "learning_rate": 5.1465614427152495e-06, "loss": 0.3055, "step": 5199 }, { "epoch": 0.5060827250608273, "grad_norm": 1.3681833182810281, "learning_rate": 5.144985957474625e-06, "loss": 0.5088, "step": 5200 }, { "epoch": 0.5061800486618004, "grad_norm": 1.4227728052800053, "learning_rate": 5.1434104578266575e-06, "loss": 0.49, "step": 5201 }, { "epoch": 0.5062773722627737, "grad_norm": 1.5583998480624413, "learning_rate": 5.1418349439279024e-06, "loss": 0.458, "step": 5202 }, { "epoch": 0.506374695863747, "grad_norm": 1.5438214613710497, "learning_rate": 5.140259415934924e-06, "loss": 0.4232, "step": 5203 }, { "epoch": 0.5064720194647202, "grad_norm": 1.4731204164002378, "learning_rate": 5.1386838740042786e-06, "loss": 0.3355, "step": 5204 }, { "epoch": 0.5065693430656935, "grad_norm": 1.4701803020269792, "learning_rate": 5.137108318292533e-06, "loss": 0.3414, "step": 5205 }, { "epoch": 0.5066666666666667, "grad_norm": 1.5475226693657602, "learning_rate": 5.135532748956249e-06, "loss": 0.447, "step": 5206 }, { "epoch": 0.5067639902676399, "grad_norm": 1.5093316050228651, "learning_rate": 5.1339571661519934e-06, "loss": 0.4464, "step": 5207 }, { "epoch": 0.5068613138686131, "grad_norm": 1.471389509018518, "learning_rate": 5.132381570036331e-06, "loss": 0.4475, "step": 5208 }, { "epoch": 0.5069586374695864, "grad_norm": 1.1213425416723295, "learning_rate": 5.130805960765831e-06, "loss": 0.2829, "step": 5209 }, { "epoch": 0.5070559610705596, "grad_norm": 1.6683753632476268, "learning_rate": 5.129230338497062e-06, "loss": 0.7265, "step": 5210 }, { "epoch": 0.5071532846715329, "grad_norm": 1.3873827996457901, "learning_rate": 5.127654703386596e-06, "loss": 0.3492, "step": 5211 }, { "epoch": 0.5072506082725061, "grad_norm": 1.7396379641179833, "learning_rate": 5.126079055591002e-06, "loss": 0.4454, "step": 5212 }, { "epoch": 0.5073479318734794, "grad_norm": 1.3220037880062712, "learning_rate": 5.1245033952668556e-06, "loss": 0.3256, "step": 5213 }, { "epoch": 0.5074452554744525, "grad_norm": 1.6095446154646171, "learning_rate": 5.122927722570731e-06, "loss": 0.5942, "step": 5214 }, { "epoch": 0.5075425790754258, "grad_norm": 1.3887406344231006, "learning_rate": 5.121352037659201e-06, "loss": 0.4056, "step": 5215 }, { "epoch": 0.507639902676399, "grad_norm": 1.7774251730576711, "learning_rate": 5.119776340688846e-06, "loss": 0.3668, "step": 5216 }, { "epoch": 0.5077372262773723, "grad_norm": 1.3880009700458447, "learning_rate": 5.118200631816241e-06, "loss": 0.4426, "step": 5217 }, { "epoch": 0.5078345498783455, "grad_norm": 1.1618270402831925, "learning_rate": 5.116624911197968e-06, "loss": 0.3173, "step": 5218 }, { "epoch": 0.5079318734793188, "grad_norm": 1.3052971608998365, "learning_rate": 5.115049178990606e-06, "loss": 0.3429, "step": 5219 }, { "epoch": 0.5080291970802919, "grad_norm": 1.395739889101101, "learning_rate": 5.113473435350736e-06, "loss": 0.2611, "step": 5220 }, { "epoch": 0.5081265206812652, "grad_norm": 1.4207574833436742, "learning_rate": 5.11189768043494e-06, "loss": 0.3232, "step": 5221 }, { "epoch": 0.5082238442822384, "grad_norm": 1.5056864717126979, "learning_rate": 5.110321914399803e-06, "loss": 0.5251, "step": 5222 }, { "epoch": 0.5083211678832117, "grad_norm": 1.3428681607214714, "learning_rate": 5.108746137401911e-06, "loss": 0.4092, "step": 5223 }, { "epoch": 0.5084184914841849, "grad_norm": 1.4359802837452804, "learning_rate": 5.107170349597847e-06, "loss": 0.3664, "step": 5224 }, { "epoch": 0.5085158150851582, "grad_norm": 1.1453923446073062, "learning_rate": 5.105594551144201e-06, "loss": 0.3425, "step": 5225 }, { "epoch": 0.5086131386861313, "grad_norm": 1.378996935231743, "learning_rate": 5.104018742197557e-06, "loss": 0.2565, "step": 5226 }, { "epoch": 0.5087104622871046, "grad_norm": 1.3853043771234994, "learning_rate": 5.1024429229145086e-06, "loss": 0.3338, "step": 5227 }, { "epoch": 0.5088077858880778, "grad_norm": 1.0898316085303188, "learning_rate": 5.1008670934516444e-06, "loss": 0.2411, "step": 5228 }, { "epoch": 0.5089051094890511, "grad_norm": 1.3490666624583345, "learning_rate": 5.099291253965554e-06, "loss": 0.3622, "step": 5229 }, { "epoch": 0.5090024330900244, "grad_norm": 1.4649127587911646, "learning_rate": 5.097715404612832e-06, "loss": 0.5378, "step": 5230 }, { "epoch": 0.5090997566909976, "grad_norm": 2.5116790720889153, "learning_rate": 5.096139545550068e-06, "loss": 0.4589, "step": 5231 }, { "epoch": 0.5091970802919707, "grad_norm": 1.2142760492789682, "learning_rate": 5.094563676933859e-06, "loss": 0.1932, "step": 5232 }, { "epoch": 0.509294403892944, "grad_norm": 1.485833425316814, "learning_rate": 5.0929877989207995e-06, "loss": 0.5478, "step": 5233 }, { "epoch": 0.5093917274939173, "grad_norm": 1.717694044853761, "learning_rate": 5.091411911667486e-06, "loss": 0.5881, "step": 5234 }, { "epoch": 0.5094890510948905, "grad_norm": 1.4710499226690918, "learning_rate": 5.089836015330514e-06, "loss": 0.497, "step": 5235 }, { "epoch": 0.5095863746958638, "grad_norm": 1.5055591566004431, "learning_rate": 5.088260110066483e-06, "loss": 0.4424, "step": 5236 }, { "epoch": 0.509683698296837, "grad_norm": 1.5383315311116867, "learning_rate": 5.086684196031989e-06, "loss": 0.4836, "step": 5237 }, { "epoch": 0.5097810218978103, "grad_norm": 1.5059324412548862, "learning_rate": 5.0851082733836336e-06, "loss": 0.3933, "step": 5238 }, { "epoch": 0.5098783454987834, "grad_norm": 1.3820898031766242, "learning_rate": 5.083532342278018e-06, "loss": 0.3332, "step": 5239 }, { "epoch": 0.5099756690997567, "grad_norm": 1.3425848589171392, "learning_rate": 5.081956402871741e-06, "loss": 0.274, "step": 5240 }, { "epoch": 0.5100729927007299, "grad_norm": 1.3989700922846977, "learning_rate": 5.080380455321406e-06, "loss": 0.5732, "step": 5241 }, { "epoch": 0.5101703163017032, "grad_norm": 1.5860258932802054, "learning_rate": 5.078804499783616e-06, "loss": 0.6319, "step": 5242 }, { "epoch": 0.5102676399026764, "grad_norm": 1.3338998346083977, "learning_rate": 5.077228536414973e-06, "loss": 0.3491, "step": 5243 }, { "epoch": 0.5103649635036497, "grad_norm": 1.3993635042032706, "learning_rate": 5.075652565372085e-06, "loss": 0.3831, "step": 5244 }, { "epoch": 0.5104622871046228, "grad_norm": 1.390036940804176, "learning_rate": 5.074076586811554e-06, "loss": 0.4699, "step": 5245 }, { "epoch": 0.5105596107055961, "grad_norm": 1.4230846248188307, "learning_rate": 5.072500600889987e-06, "loss": 0.3224, "step": 5246 }, { "epoch": 0.5106569343065693, "grad_norm": 1.3887220070504431, "learning_rate": 5.0709246077639916e-06, "loss": 0.3999, "step": 5247 }, { "epoch": 0.5107542579075426, "grad_norm": 1.4176001371775404, "learning_rate": 5.069348607590173e-06, "loss": 0.4229, "step": 5248 }, { "epoch": 0.5108515815085158, "grad_norm": 1.5911285682486571, "learning_rate": 5.0677726005251415e-06, "loss": 0.3247, "step": 5249 }, { "epoch": 0.5109489051094891, "grad_norm": 1.4656677370001712, "learning_rate": 5.066196586725506e-06, "loss": 0.3794, "step": 5250 }, { "epoch": 0.5110462287104622, "grad_norm": 1.3077387963312597, "learning_rate": 5.064620566347873e-06, "loss": 0.2733, "step": 5251 }, { "epoch": 0.5111435523114355, "grad_norm": 1.5215572393596322, "learning_rate": 5.063044539548856e-06, "loss": 0.4483, "step": 5252 }, { "epoch": 0.5112408759124087, "grad_norm": 1.6044973139991519, "learning_rate": 5.061468506485062e-06, "loss": 0.388, "step": 5253 }, { "epoch": 0.511338199513382, "grad_norm": 1.5795254678004327, "learning_rate": 5.059892467313108e-06, "loss": 0.4996, "step": 5254 }, { "epoch": 0.5114355231143553, "grad_norm": 1.3418824281029393, "learning_rate": 5.058316422189601e-06, "loss": 0.3284, "step": 5255 }, { "epoch": 0.5115328467153285, "grad_norm": 1.3351414857676405, "learning_rate": 5.056740371271156e-06, "loss": 0.4204, "step": 5256 }, { "epoch": 0.5116301703163018, "grad_norm": 1.1698063202662288, "learning_rate": 5.055164314714386e-06, "loss": 0.3215, "step": 5257 }, { "epoch": 0.5117274939172749, "grad_norm": 2.2901542065268994, "learning_rate": 5.053588252675901e-06, "loss": 0.3884, "step": 5258 }, { "epoch": 0.5118248175182482, "grad_norm": 1.6006747134471273, "learning_rate": 5.052012185312322e-06, "loss": 0.61, "step": 5259 }, { "epoch": 0.5119221411192214, "grad_norm": 1.4627634968543248, "learning_rate": 5.0504361127802585e-06, "loss": 0.3961, "step": 5260 }, { "epoch": 0.5120194647201947, "grad_norm": 1.6105826578136704, "learning_rate": 5.048860035236328e-06, "loss": 0.4894, "step": 5261 }, { "epoch": 0.5121167883211679, "grad_norm": 1.4599600393235113, "learning_rate": 5.047283952837146e-06, "loss": 0.5297, "step": 5262 }, { "epoch": 0.5122141119221412, "grad_norm": 1.361239327773337, "learning_rate": 5.045707865739327e-06, "loss": 0.362, "step": 5263 }, { "epoch": 0.5123114355231143, "grad_norm": 1.4699823606821112, "learning_rate": 5.044131774099489e-06, "loss": 0.4075, "step": 5264 }, { "epoch": 0.5124087591240876, "grad_norm": 1.3088432415790754, "learning_rate": 5.042555678074251e-06, "loss": 0.3839, "step": 5265 }, { "epoch": 0.5125060827250608, "grad_norm": 1.624117318087578, "learning_rate": 5.040979577820231e-06, "loss": 0.5712, "step": 5266 }, { "epoch": 0.5126034063260341, "grad_norm": 1.2961648122519063, "learning_rate": 5.039403473494042e-06, "loss": 0.362, "step": 5267 }, { "epoch": 0.5127007299270073, "grad_norm": 1.4685680195502195, "learning_rate": 5.037827365252306e-06, "loss": 0.3314, "step": 5268 }, { "epoch": 0.5127980535279806, "grad_norm": 1.2816964278880971, "learning_rate": 5.036251253251641e-06, "loss": 0.3747, "step": 5269 }, { "epoch": 0.5128953771289537, "grad_norm": 1.5665013006200665, "learning_rate": 5.034675137648669e-06, "loss": 0.5275, "step": 5270 }, { "epoch": 0.512992700729927, "grad_norm": 1.187991141093583, "learning_rate": 5.0330990186000066e-06, "loss": 0.2295, "step": 5271 }, { "epoch": 0.5130900243309002, "grad_norm": 1.4630057642282757, "learning_rate": 5.0315228962622745e-06, "loss": 0.5126, "step": 5272 }, { "epoch": 0.5131873479318735, "grad_norm": 1.1695969883568185, "learning_rate": 5.029946770792091e-06, "loss": 0.3545, "step": 5273 }, { "epoch": 0.5132846715328467, "grad_norm": 1.5783866438813425, "learning_rate": 5.02837064234608e-06, "loss": 0.2771, "step": 5274 }, { "epoch": 0.51338199513382, "grad_norm": 1.3160515372408526, "learning_rate": 5.02679451108086e-06, "loss": 0.3071, "step": 5275 }, { "epoch": 0.5134793187347931, "grad_norm": 1.5085722437253624, "learning_rate": 5.025218377153054e-06, "loss": 0.4331, "step": 5276 }, { "epoch": 0.5135766423357664, "grad_norm": 1.1762146515353866, "learning_rate": 5.023642240719282e-06, "loss": 0.2547, "step": 5277 }, { "epoch": 0.5136739659367396, "grad_norm": 1.5173333362139103, "learning_rate": 5.022066101936166e-06, "loss": 0.4665, "step": 5278 }, { "epoch": 0.5137712895377129, "grad_norm": 1.675836237647703, "learning_rate": 5.020489960960327e-06, "loss": 0.5712, "step": 5279 }, { "epoch": 0.5138686131386861, "grad_norm": 1.6130314742137548, "learning_rate": 5.018913817948388e-06, "loss": 0.3821, "step": 5280 }, { "epoch": 0.5139659367396594, "grad_norm": 1.3104199775852088, "learning_rate": 5.017337673056972e-06, "loss": 0.3385, "step": 5281 }, { "epoch": 0.5140632603406327, "grad_norm": 1.4924240324281064, "learning_rate": 5.015761526442701e-06, "loss": 0.4171, "step": 5282 }, { "epoch": 0.5141605839416058, "grad_norm": 1.3479665205558893, "learning_rate": 5.0141853782621985e-06, "loss": 0.2608, "step": 5283 }, { "epoch": 0.514257907542579, "grad_norm": 1.4408433430141794, "learning_rate": 5.012609228672084e-06, "loss": 0.3956, "step": 5284 }, { "epoch": 0.5143552311435523, "grad_norm": 1.7542815284847701, "learning_rate": 5.011033077828983e-06, "loss": 0.648, "step": 5285 }, { "epoch": 0.5144525547445256, "grad_norm": 1.2458564747646057, "learning_rate": 5.00945692588952e-06, "loss": 0.2349, "step": 5286 }, { "epoch": 0.5145498783454988, "grad_norm": 1.4077158176023123, "learning_rate": 5.0078807730103156e-06, "loss": 0.3964, "step": 5287 }, { "epoch": 0.5146472019464721, "grad_norm": 1.0361485745222532, "learning_rate": 5.006304619347994e-06, "loss": 0.2808, "step": 5288 }, { "epoch": 0.5147445255474452, "grad_norm": 1.4391256345470669, "learning_rate": 5.004728465059178e-06, "loss": 0.3177, "step": 5289 }, { "epoch": 0.5148418491484185, "grad_norm": 1.4253934149846805, "learning_rate": 5.003152310300491e-06, "loss": 0.4867, "step": 5290 }, { "epoch": 0.5149391727493917, "grad_norm": 1.3615984217344341, "learning_rate": 5.001576155228557e-06, "loss": 0.4332, "step": 5291 }, { "epoch": 0.515036496350365, "grad_norm": 1.120942427893155, "learning_rate": 5e-06, "loss": 0.1725, "step": 5292 }, { "epoch": 0.5151338199513382, "grad_norm": 1.177574032928437, "learning_rate": 4.998423844771444e-06, "loss": 0.251, "step": 5293 }, { "epoch": 0.5152311435523115, "grad_norm": 1.1411912635988943, "learning_rate": 4.996847689699511e-06, "loss": 0.2571, "step": 5294 }, { "epoch": 0.5153284671532846, "grad_norm": 1.2861568185105374, "learning_rate": 4.995271534940825e-06, "loss": 0.392, "step": 5295 }, { "epoch": 0.5154257907542579, "grad_norm": 1.245600827102656, "learning_rate": 4.993695380652008e-06, "loss": 0.3169, "step": 5296 }, { "epoch": 0.5155231143552311, "grad_norm": 1.2725138678853647, "learning_rate": 4.992119226989685e-06, "loss": 0.4123, "step": 5297 }, { "epoch": 0.5156204379562044, "grad_norm": 1.1625577173979076, "learning_rate": 4.990543074110483e-06, "loss": 0.3327, "step": 5298 }, { "epoch": 0.5157177615571776, "grad_norm": 1.207443759390374, "learning_rate": 4.9889669221710186e-06, "loss": 0.3293, "step": 5299 }, { "epoch": 0.5158150851581509, "grad_norm": 1.0668765417891246, "learning_rate": 4.987390771327917e-06, "loss": 0.2386, "step": 5300 }, { "epoch": 0.5159124087591241, "grad_norm": 1.3624658632814173, "learning_rate": 4.985814621737803e-06, "loss": 0.3544, "step": 5301 }, { "epoch": 0.5160097323600973, "grad_norm": 1.5246947749129098, "learning_rate": 4.9842384735573e-06, "loss": 0.4873, "step": 5302 }, { "epoch": 0.5161070559610705, "grad_norm": 1.5373783869834368, "learning_rate": 4.9826623269430286e-06, "loss": 0.4964, "step": 5303 }, { "epoch": 0.5162043795620438, "grad_norm": 1.5948966893906018, "learning_rate": 4.981086182051612e-06, "loss": 0.4453, "step": 5304 }, { "epoch": 0.516301703163017, "grad_norm": 1.637733600057433, "learning_rate": 4.979510039039674e-06, "loss": 0.4774, "step": 5305 }, { "epoch": 0.5163990267639903, "grad_norm": 1.4623466672153929, "learning_rate": 4.977933898063836e-06, "loss": 0.4613, "step": 5306 }, { "epoch": 0.5164963503649636, "grad_norm": 1.1544349445832902, "learning_rate": 4.9763577592807195e-06, "loss": 0.283, "step": 5307 }, { "epoch": 0.5165936739659367, "grad_norm": 1.4818676300702187, "learning_rate": 4.974781622846946e-06, "loss": 0.4101, "step": 5308 }, { "epoch": 0.51669099756691, "grad_norm": 1.277791589279903, "learning_rate": 4.973205488919141e-06, "loss": 0.346, "step": 5309 }, { "epoch": 0.5167883211678832, "grad_norm": 1.2007044433173495, "learning_rate": 4.971629357653922e-06, "loss": 0.3009, "step": 5310 }, { "epoch": 0.5168856447688565, "grad_norm": 1.4503422826410344, "learning_rate": 4.97005322920791e-06, "loss": 0.4332, "step": 5311 }, { "epoch": 0.5169829683698297, "grad_norm": 1.3530624974287928, "learning_rate": 4.968477103737728e-06, "loss": 0.4242, "step": 5312 }, { "epoch": 0.517080291970803, "grad_norm": 1.5703917526059643, "learning_rate": 4.966900981399995e-06, "loss": 0.5539, "step": 5313 }, { "epoch": 0.5171776155717761, "grad_norm": 1.4475897916308973, "learning_rate": 4.965324862351333e-06, "loss": 0.3058, "step": 5314 }, { "epoch": 0.5172749391727494, "grad_norm": 1.8442399618571903, "learning_rate": 4.963748746748359e-06, "loss": 0.4812, "step": 5315 }, { "epoch": 0.5173722627737226, "grad_norm": 1.5981610160651096, "learning_rate": 4.962172634747695e-06, "loss": 0.4771, "step": 5316 }, { "epoch": 0.5174695863746959, "grad_norm": 1.3855322261409106, "learning_rate": 4.96059652650596e-06, "loss": 0.4074, "step": 5317 }, { "epoch": 0.5175669099756691, "grad_norm": 1.4602293858548268, "learning_rate": 4.959020422179771e-06, "loss": 0.4845, "step": 5318 }, { "epoch": 0.5176642335766424, "grad_norm": 1.6579157531970967, "learning_rate": 4.957444321925748e-06, "loss": 0.4785, "step": 5319 }, { "epoch": 0.5177615571776155, "grad_norm": 1.4080106098360512, "learning_rate": 4.955868225900512e-06, "loss": 0.5201, "step": 5320 }, { "epoch": 0.5178588807785888, "grad_norm": 1.6072174721003103, "learning_rate": 4.954292134260675e-06, "loss": 0.4127, "step": 5321 }, { "epoch": 0.517956204379562, "grad_norm": 1.6250051351612822, "learning_rate": 4.952716047162855e-06, "loss": 0.4988, "step": 5322 }, { "epoch": 0.5180535279805353, "grad_norm": 1.3265372266035595, "learning_rate": 4.951139964763675e-06, "loss": 0.3624, "step": 5323 }, { "epoch": 0.5181508515815085, "grad_norm": 1.5066553923654853, "learning_rate": 4.949563887219744e-06, "loss": 0.5188, "step": 5324 }, { "epoch": 0.5182481751824818, "grad_norm": 1.2772262472533884, "learning_rate": 4.94798781468768e-06, "loss": 0.3861, "step": 5325 }, { "epoch": 0.518345498783455, "grad_norm": 1.5033158369519337, "learning_rate": 4.9464117473240995e-06, "loss": 0.537, "step": 5326 }, { "epoch": 0.5184428223844282, "grad_norm": 1.5627108622367212, "learning_rate": 4.944835685285616e-06, "loss": 0.3678, "step": 5327 }, { "epoch": 0.5185401459854014, "grad_norm": 1.2039569108862966, "learning_rate": 4.943259628728845e-06, "loss": 0.3805, "step": 5328 }, { "epoch": 0.5186374695863747, "grad_norm": 1.3601407054032584, "learning_rate": 4.941683577810399e-06, "loss": 0.294, "step": 5329 }, { "epoch": 0.5187347931873479, "grad_norm": 1.742643219741679, "learning_rate": 4.940107532686895e-06, "loss": 0.555, "step": 5330 }, { "epoch": 0.5188321167883212, "grad_norm": 1.4838949129832981, "learning_rate": 4.9385314935149385e-06, "loss": 0.4845, "step": 5331 }, { "epoch": 0.5189294403892944, "grad_norm": 1.4315876658221396, "learning_rate": 4.936955460451145e-06, "loss": 0.3971, "step": 5332 }, { "epoch": 0.5190267639902676, "grad_norm": 1.5188978586317299, "learning_rate": 4.935379433652127e-06, "loss": 0.4045, "step": 5333 }, { "epoch": 0.5191240875912408, "grad_norm": 1.872810738707678, "learning_rate": 4.933803413274497e-06, "loss": 0.6562, "step": 5334 }, { "epoch": 0.5192214111922141, "grad_norm": 1.3765965642622116, "learning_rate": 4.93222739947486e-06, "loss": 0.4553, "step": 5335 }, { "epoch": 0.5193187347931874, "grad_norm": 1.4118604005258877, "learning_rate": 4.9306513924098275e-06, "loss": 0.323, "step": 5336 }, { "epoch": 0.5194160583941606, "grad_norm": 1.4505421644339682, "learning_rate": 4.929075392236009e-06, "loss": 0.4844, "step": 5337 }, { "epoch": 0.5195133819951339, "grad_norm": 1.478650186480109, "learning_rate": 4.927499399110014e-06, "loss": 0.365, "step": 5338 }, { "epoch": 0.519610705596107, "grad_norm": 1.5534636314464154, "learning_rate": 4.925923413188447e-06, "loss": 0.4952, "step": 5339 }, { "epoch": 0.5197080291970803, "grad_norm": 1.1642216823214826, "learning_rate": 4.924347434627916e-06, "loss": 0.263, "step": 5340 }, { "epoch": 0.5198053527980535, "grad_norm": 1.4445920231002787, "learning_rate": 4.922771463585029e-06, "loss": 0.3051, "step": 5341 }, { "epoch": 0.5199026763990268, "grad_norm": 1.2772889665060643, "learning_rate": 4.921195500216386e-06, "loss": 0.3481, "step": 5342 }, { "epoch": 0.52, "grad_norm": 1.7273031864035941, "learning_rate": 4.9196195446785946e-06, "loss": 0.3866, "step": 5343 }, { "epoch": 0.5200973236009733, "grad_norm": 1.4867375896340647, "learning_rate": 4.91804359712826e-06, "loss": 0.5459, "step": 5344 }, { "epoch": 0.5201946472019465, "grad_norm": 1.2278391032105154, "learning_rate": 4.916467657721985e-06, "loss": 0.3528, "step": 5345 }, { "epoch": 0.5202919708029197, "grad_norm": 1.3953934683380196, "learning_rate": 4.914891726616367e-06, "loss": 0.2001, "step": 5346 }, { "epoch": 0.5203892944038929, "grad_norm": 1.1224301975356168, "learning_rate": 4.913315803968012e-06, "loss": 0.3356, "step": 5347 }, { "epoch": 0.5204866180048662, "grad_norm": 1.701811887962682, "learning_rate": 4.91173988993352e-06, "loss": 0.6132, "step": 5348 }, { "epoch": 0.5205839416058394, "grad_norm": 1.2358617400087601, "learning_rate": 4.910163984669488e-06, "loss": 0.324, "step": 5349 }, { "epoch": 0.5206812652068127, "grad_norm": 1.2515284730093246, "learning_rate": 4.908588088332515e-06, "loss": 0.3994, "step": 5350 }, { "epoch": 0.5207785888077859, "grad_norm": 1.4126383317035256, "learning_rate": 4.907012201079201e-06, "loss": 0.3223, "step": 5351 }, { "epoch": 0.5208759124087591, "grad_norm": 1.3208070088520005, "learning_rate": 4.905436323066143e-06, "loss": 0.3576, "step": 5352 }, { "epoch": 0.5209732360097323, "grad_norm": 1.297050280388292, "learning_rate": 4.903860454449933e-06, "loss": 0.3447, "step": 5353 }, { "epoch": 0.5210705596107056, "grad_norm": 1.5683955548247523, "learning_rate": 4.90228459538717e-06, "loss": 0.3439, "step": 5354 }, { "epoch": 0.5211678832116788, "grad_norm": 1.2908503301070766, "learning_rate": 4.900708746034447e-06, "loss": 0.3223, "step": 5355 }, { "epoch": 0.5212652068126521, "grad_norm": 1.2697856941531227, "learning_rate": 4.899132906548358e-06, "loss": 0.3964, "step": 5356 }, { "epoch": 0.5213625304136253, "grad_norm": 1.475725693714655, "learning_rate": 4.897557077085493e-06, "loss": 0.5808, "step": 5357 }, { "epoch": 0.5214598540145985, "grad_norm": 1.5730752507111745, "learning_rate": 4.895981257802444e-06, "loss": 0.5628, "step": 5358 }, { "epoch": 0.5215571776155717, "grad_norm": 1.2644855409084375, "learning_rate": 4.894405448855802e-06, "loss": 0.3988, "step": 5359 }, { "epoch": 0.521654501216545, "grad_norm": 1.4471549173244473, "learning_rate": 4.892829650402154e-06, "loss": 0.4499, "step": 5360 }, { "epoch": 0.5217518248175183, "grad_norm": 1.2611962054210362, "learning_rate": 4.891253862598091e-06, "loss": 0.2343, "step": 5361 }, { "epoch": 0.5218491484184915, "grad_norm": 1.466687479015995, "learning_rate": 4.889678085600197e-06, "loss": 0.4877, "step": 5362 }, { "epoch": 0.5219464720194648, "grad_norm": 1.3646762563889987, "learning_rate": 4.888102319565062e-06, "loss": 0.3101, "step": 5363 }, { "epoch": 0.522043795620438, "grad_norm": 1.6519749006530624, "learning_rate": 4.886526564649266e-06, "loss": 0.5344, "step": 5364 }, { "epoch": 0.5221411192214112, "grad_norm": 1.3831849539510426, "learning_rate": 4.884950821009395e-06, "loss": 0.4052, "step": 5365 }, { "epoch": 0.5222384428223844, "grad_norm": 1.97221130791929, "learning_rate": 4.883375088802035e-06, "loss": 0.5541, "step": 5366 }, { "epoch": 0.5223357664233577, "grad_norm": 1.1782514408166704, "learning_rate": 4.88179936818376e-06, "loss": 0.3286, "step": 5367 }, { "epoch": 0.5224330900243309, "grad_norm": 1.5700750218829738, "learning_rate": 4.8802236593111565e-06, "loss": 0.5532, "step": 5368 }, { "epoch": 0.5225304136253042, "grad_norm": 1.3709483273324035, "learning_rate": 4.878647962340801e-06, "loss": 0.3527, "step": 5369 }, { "epoch": 0.5226277372262774, "grad_norm": 1.4098238143686732, "learning_rate": 4.8770722774292725e-06, "loss": 0.3371, "step": 5370 }, { "epoch": 0.5227250608272506, "grad_norm": 1.290909432387948, "learning_rate": 4.875496604733146e-06, "loss": 0.2814, "step": 5371 }, { "epoch": 0.5228223844282238, "grad_norm": 1.2700354411218717, "learning_rate": 4.873920944408999e-06, "loss": 0.3527, "step": 5372 }, { "epoch": 0.5229197080291971, "grad_norm": 1.3275619210782128, "learning_rate": 4.872345296613405e-06, "loss": 0.2641, "step": 5373 }, { "epoch": 0.5230170316301703, "grad_norm": 1.5426199603306376, "learning_rate": 4.870769661502939e-06, "loss": 0.4184, "step": 5374 }, { "epoch": 0.5231143552311436, "grad_norm": 1.6311268113198076, "learning_rate": 4.86919403923417e-06, "loss": 0.5194, "step": 5375 }, { "epoch": 0.5232116788321168, "grad_norm": 1.3170502216666653, "learning_rate": 4.867618429963669e-06, "loss": 0.297, "step": 5376 }, { "epoch": 0.52330900243309, "grad_norm": 1.5149566120443692, "learning_rate": 4.866042833848009e-06, "loss": 0.3749, "step": 5377 }, { "epoch": 0.5234063260340632, "grad_norm": 1.624425493066898, "learning_rate": 4.864467251043752e-06, "loss": 0.5824, "step": 5378 }, { "epoch": 0.5235036496350365, "grad_norm": 1.4121059982238326, "learning_rate": 4.8628916817074684e-06, "loss": 0.3096, "step": 5379 }, { "epoch": 0.5236009732360097, "grad_norm": 1.4549118049345602, "learning_rate": 4.861316125995722e-06, "loss": 0.2738, "step": 5380 }, { "epoch": 0.523698296836983, "grad_norm": 1.5217440069013852, "learning_rate": 4.859740584065079e-06, "loss": 0.4939, "step": 5381 }, { "epoch": 0.5237956204379562, "grad_norm": 1.3476948608174777, "learning_rate": 4.858165056072099e-06, "loss": 0.2559, "step": 5382 }, { "epoch": 0.5238929440389294, "grad_norm": 1.478647767867753, "learning_rate": 4.856589542173344e-06, "loss": 0.3895, "step": 5383 }, { "epoch": 0.5239902676399026, "grad_norm": 1.568105636726477, "learning_rate": 4.8550140425253764e-06, "loss": 0.6688, "step": 5384 }, { "epoch": 0.5240875912408759, "grad_norm": 1.572433026802508, "learning_rate": 4.853438557284751e-06, "loss": 0.287, "step": 5385 }, { "epoch": 0.5241849148418491, "grad_norm": 1.5713408030733105, "learning_rate": 4.851863086608026e-06, "loss": 0.4803, "step": 5386 }, { "epoch": 0.5242822384428224, "grad_norm": 1.7900814170009662, "learning_rate": 4.850287630651757e-06, "loss": 0.4003, "step": 5387 }, { "epoch": 0.5243795620437957, "grad_norm": 1.7120394010733622, "learning_rate": 4.848712189572502e-06, "loss": 0.4937, "step": 5388 }, { "epoch": 0.5244768856447689, "grad_norm": 1.406520902183401, "learning_rate": 4.8471367635268056e-06, "loss": 0.4204, "step": 5389 }, { "epoch": 0.524574209245742, "grad_norm": 1.3796929554815212, "learning_rate": 4.845561352671224e-06, "loss": 0.4107, "step": 5390 }, { "epoch": 0.5246715328467153, "grad_norm": 1.6170913004250942, "learning_rate": 4.843985957162304e-06, "loss": 0.4272, "step": 5391 }, { "epoch": 0.5247688564476886, "grad_norm": 1.3186027697776659, "learning_rate": 4.842410577156599e-06, "loss": 0.3675, "step": 5392 }, { "epoch": 0.5248661800486618, "grad_norm": 1.3083922273699464, "learning_rate": 4.840835212810649e-06, "loss": 0.3653, "step": 5393 }, { "epoch": 0.5249635036496351, "grad_norm": 2.6172644760426738, "learning_rate": 4.839259864281002e-06, "loss": 0.4221, "step": 5394 }, { "epoch": 0.5250608272506083, "grad_norm": 1.5818666725738313, "learning_rate": 4.837684531724202e-06, "loss": 0.5843, "step": 5395 }, { "epoch": 0.5251581508515815, "grad_norm": 1.6978305802627989, "learning_rate": 4.8361092152967896e-06, "loss": 0.5218, "step": 5396 }, { "epoch": 0.5252554744525547, "grad_norm": 1.6514057355698206, "learning_rate": 4.834533915155305e-06, "loss": 0.6913, "step": 5397 }, { "epoch": 0.525352798053528, "grad_norm": 1.6530126926514057, "learning_rate": 4.832958631456286e-06, "loss": 0.4403, "step": 5398 }, { "epoch": 0.5254501216545012, "grad_norm": 1.3228907945561075, "learning_rate": 4.831383364356274e-06, "loss": 0.342, "step": 5399 }, { "epoch": 0.5255474452554745, "grad_norm": 1.442083349631125, "learning_rate": 4.829808114011798e-06, "loss": 0.353, "step": 5400 }, { "epoch": 0.5256447688564477, "grad_norm": 1.1397565906398535, "learning_rate": 4.828232880579393e-06, "loss": 0.2772, "step": 5401 }, { "epoch": 0.5257420924574209, "grad_norm": 1.8186944146854849, "learning_rate": 4.826657664215596e-06, "loss": 0.3717, "step": 5402 }, { "epoch": 0.5258394160583941, "grad_norm": 1.202086756198358, "learning_rate": 4.825082465076931e-06, "loss": 0.2336, "step": 5403 }, { "epoch": 0.5259367396593674, "grad_norm": 1.1275223429933794, "learning_rate": 4.8235072833199285e-06, "loss": 0.3068, "step": 5404 }, { "epoch": 0.5260340632603406, "grad_norm": 1.3474112025287421, "learning_rate": 4.821932119101116e-06, "loss": 0.3252, "step": 5405 }, { "epoch": 0.5261313868613139, "grad_norm": 1.4563248381806024, "learning_rate": 4.820356972577019e-06, "loss": 0.5844, "step": 5406 }, { "epoch": 0.5262287104622871, "grad_norm": 1.6810836312607915, "learning_rate": 4.81878184390416e-06, "loss": 0.3299, "step": 5407 }, { "epoch": 0.5263260340632604, "grad_norm": 1.3337967881752477, "learning_rate": 4.81720673323906e-06, "loss": 0.2969, "step": 5408 }, { "epoch": 0.5264233576642335, "grad_norm": 1.5825437116085013, "learning_rate": 4.815631640738239e-06, "loss": 0.6384, "step": 5409 }, { "epoch": 0.5265206812652068, "grad_norm": 1.7317820919120244, "learning_rate": 4.814056566558218e-06, "loss": 0.6892, "step": 5410 }, { "epoch": 0.52661800486618, "grad_norm": 1.327109363645959, "learning_rate": 4.812481510855508e-06, "loss": 0.2553, "step": 5411 }, { "epoch": 0.5267153284671533, "grad_norm": 1.285187694779396, "learning_rate": 4.8109064737866235e-06, "loss": 0.3509, "step": 5412 }, { "epoch": 0.5268126520681266, "grad_norm": 1.3226001941679035, "learning_rate": 4.809331455508083e-06, "loss": 0.4332, "step": 5413 }, { "epoch": 0.5269099756690998, "grad_norm": 1.3383124584471806, "learning_rate": 4.807756456176391e-06, "loss": 0.3537, "step": 5414 }, { "epoch": 0.527007299270073, "grad_norm": 1.5958030790776172, "learning_rate": 4.806181475948057e-06, "loss": 0.4861, "step": 5415 }, { "epoch": 0.5271046228710462, "grad_norm": 1.4065320050083008, "learning_rate": 4.8046065149795905e-06, "loss": 0.4506, "step": 5416 }, { "epoch": 0.5272019464720195, "grad_norm": 1.420411750842393, "learning_rate": 4.803031573427495e-06, "loss": 0.233, "step": 5417 }, { "epoch": 0.5272992700729927, "grad_norm": 1.4294795317597138, "learning_rate": 4.801456651448271e-06, "loss": 0.4164, "step": 5418 }, { "epoch": 0.527396593673966, "grad_norm": 1.362469100601773, "learning_rate": 4.799881749198423e-06, "loss": 0.3538, "step": 5419 }, { "epoch": 0.5274939172749392, "grad_norm": 1.3971331603170367, "learning_rate": 4.798306866834448e-06, "loss": 0.3763, "step": 5420 }, { "epoch": 0.5275912408759124, "grad_norm": 1.5699734298012762, "learning_rate": 4.796732004512846e-06, "loss": 0.3529, "step": 5421 }, { "epoch": 0.5276885644768856, "grad_norm": 1.7138701783939359, "learning_rate": 4.795157162390106e-06, "loss": 0.4578, "step": 5422 }, { "epoch": 0.5277858880778589, "grad_norm": 1.3247389770210467, "learning_rate": 4.793582340622726e-06, "loss": 0.3972, "step": 5423 }, { "epoch": 0.5278832116788321, "grad_norm": 1.5437024653602935, "learning_rate": 4.792007539367198e-06, "loss": 0.5013, "step": 5424 }, { "epoch": 0.5279805352798054, "grad_norm": 1.4497454321128689, "learning_rate": 4.790432758780006e-06, "loss": 0.4699, "step": 5425 }, { "epoch": 0.5280778588807786, "grad_norm": 1.3974542837564343, "learning_rate": 4.78885799901764e-06, "loss": 0.4862, "step": 5426 }, { "epoch": 0.5281751824817518, "grad_norm": 1.3731823524717879, "learning_rate": 4.7872832602365845e-06, "loss": 0.3659, "step": 5427 }, { "epoch": 0.528272506082725, "grad_norm": 1.5664970390664423, "learning_rate": 4.785708542593323e-06, "loss": 0.2403, "step": 5428 }, { "epoch": 0.5283698296836983, "grad_norm": 1.380386293082662, "learning_rate": 4.784133846244334e-06, "loss": 0.485, "step": 5429 }, { "epoch": 0.5284671532846715, "grad_norm": 1.5356984818871535, "learning_rate": 4.7825591713460985e-06, "loss": 0.2544, "step": 5430 }, { "epoch": 0.5285644768856448, "grad_norm": 1.4034684540370157, "learning_rate": 4.780984518055093e-06, "loss": 0.348, "step": 5431 }, { "epoch": 0.528661800486618, "grad_norm": 1.3435931446878362, "learning_rate": 4.779409886527787e-06, "loss": 0.3918, "step": 5432 }, { "epoch": 0.5287591240875913, "grad_norm": 1.7290638199826511, "learning_rate": 4.777835276920658e-06, "loss": 0.3752, "step": 5433 }, { "epoch": 0.5288564476885644, "grad_norm": 1.3964564607404608, "learning_rate": 4.776260689390174e-06, "loss": 0.4607, "step": 5434 }, { "epoch": 0.5289537712895377, "grad_norm": 1.5752706870334248, "learning_rate": 4.774686124092805e-06, "loss": 0.5425, "step": 5435 }, { "epoch": 0.5290510948905109, "grad_norm": 1.6222659365687908, "learning_rate": 4.773111581185011e-06, "loss": 0.4261, "step": 5436 }, { "epoch": 0.5291484184914842, "grad_norm": 1.5325806733097729, "learning_rate": 4.77153706082326e-06, "loss": 0.4287, "step": 5437 }, { "epoch": 0.5292457420924574, "grad_norm": 1.4178414616055643, "learning_rate": 4.769962563164012e-06, "loss": 0.4252, "step": 5438 }, { "epoch": 0.5293430656934307, "grad_norm": 1.219527363450225, "learning_rate": 4.768388088363726e-06, "loss": 0.3256, "step": 5439 }, { "epoch": 0.5294403892944038, "grad_norm": 1.4044434705679236, "learning_rate": 4.766813636578856e-06, "loss": 0.3532, "step": 5440 }, { "epoch": 0.5295377128953771, "grad_norm": 1.488609858401998, "learning_rate": 4.765239207965859e-06, "loss": 0.5043, "step": 5441 }, { "epoch": 0.5296350364963504, "grad_norm": 1.5286609014215788, "learning_rate": 4.763664802681188e-06, "loss": 0.4806, "step": 5442 }, { "epoch": 0.5297323600973236, "grad_norm": 1.2058920314310113, "learning_rate": 4.762090420881289e-06, "loss": 0.3372, "step": 5443 }, { "epoch": 0.5298296836982969, "grad_norm": 1.4678890058409144, "learning_rate": 4.760516062722611e-06, "loss": 0.3401, "step": 5444 }, { "epoch": 0.5299270072992701, "grad_norm": 1.2043465692225879, "learning_rate": 4.758941728361599e-06, "loss": 0.286, "step": 5445 }, { "epoch": 0.5300243309002433, "grad_norm": 1.431438020574917, "learning_rate": 4.757367417954699e-06, "loss": 0.3746, "step": 5446 }, { "epoch": 0.5301216545012165, "grad_norm": 0.9607556178702715, "learning_rate": 4.7557931316583445e-06, "loss": 0.2165, "step": 5447 }, { "epoch": 0.5302189781021898, "grad_norm": 1.2786048007241864, "learning_rate": 4.754218869628977e-06, "loss": 0.3931, "step": 5448 }, { "epoch": 0.530316301703163, "grad_norm": 1.4084953184852351, "learning_rate": 4.752644632023032e-06, "loss": 0.4777, "step": 5449 }, { "epoch": 0.5304136253041363, "grad_norm": 1.5772624160633437, "learning_rate": 4.751070418996941e-06, "loss": 0.6249, "step": 5450 }, { "epoch": 0.5305109489051095, "grad_norm": 1.5359217940776095, "learning_rate": 4.749496230707135e-06, "loss": 0.4116, "step": 5451 }, { "epoch": 0.5306082725060828, "grad_norm": 1.4236847814504423, "learning_rate": 4.747922067310044e-06, "loss": 0.3391, "step": 5452 }, { "epoch": 0.5307055961070559, "grad_norm": 1.3461221560216945, "learning_rate": 4.746347928962092e-06, "loss": 0.303, "step": 5453 }, { "epoch": 0.5308029197080292, "grad_norm": 1.5420013871984513, "learning_rate": 4.744773815819702e-06, "loss": 0.4648, "step": 5454 }, { "epoch": 0.5309002433090024, "grad_norm": 1.6829548130055092, "learning_rate": 4.743199728039294e-06, "loss": 0.5981, "step": 5455 }, { "epoch": 0.5309975669099757, "grad_norm": 1.939319310177043, "learning_rate": 4.741625665777287e-06, "loss": 0.2136, "step": 5456 }, { "epoch": 0.5310948905109489, "grad_norm": 1.3623424456432953, "learning_rate": 4.740051629190099e-06, "loss": 0.3761, "step": 5457 }, { "epoch": 0.5311922141119222, "grad_norm": 1.6252547577919036, "learning_rate": 4.738477618434139e-06, "loss": 0.4033, "step": 5458 }, { "epoch": 0.5312895377128953, "grad_norm": 1.3668336246823372, "learning_rate": 4.736903633665817e-06, "loss": 0.3116, "step": 5459 }, { "epoch": 0.5313868613138686, "grad_norm": 1.1925691526183526, "learning_rate": 4.735329675041545e-06, "loss": 0.2819, "step": 5460 }, { "epoch": 0.5314841849148418, "grad_norm": 1.1214266292808592, "learning_rate": 4.733755742717724e-06, "loss": 0.2096, "step": 5461 }, { "epoch": 0.5315815085158151, "grad_norm": 1.381929708473621, "learning_rate": 4.732181836850759e-06, "loss": 0.3632, "step": 5462 }, { "epoch": 0.5316788321167883, "grad_norm": 1.458382151558617, "learning_rate": 4.730607957597049e-06, "loss": 0.2973, "step": 5463 }, { "epoch": 0.5317761557177616, "grad_norm": 1.2169151187468834, "learning_rate": 4.729034105112994e-06, "loss": 0.1986, "step": 5464 }, { "epoch": 0.5318734793187347, "grad_norm": 1.3161584637902233, "learning_rate": 4.727460279554984e-06, "loss": 0.3633, "step": 5465 }, { "epoch": 0.531970802919708, "grad_norm": 1.7241663812065842, "learning_rate": 4.725886481079414e-06, "loss": 0.3693, "step": 5466 }, { "epoch": 0.5320681265206813, "grad_norm": 1.3926831562940198, "learning_rate": 4.724312709842676e-06, "loss": 0.5849, "step": 5467 }, { "epoch": 0.5321654501216545, "grad_norm": 1.151762993800745, "learning_rate": 4.72273896600115e-06, "loss": 0.3172, "step": 5468 }, { "epoch": 0.5322627737226278, "grad_norm": 1.6789544213557044, "learning_rate": 4.721165249711223e-06, "loss": 0.4716, "step": 5469 }, { "epoch": 0.532360097323601, "grad_norm": 1.5509784870930794, "learning_rate": 4.719591561129278e-06, "loss": 0.604, "step": 5470 }, { "epoch": 0.5324574209245742, "grad_norm": 1.5299002478998982, "learning_rate": 4.7180179004116924e-06, "loss": 0.4283, "step": 5471 }, { "epoch": 0.5325547445255474, "grad_norm": 1.4384498774998542, "learning_rate": 4.716444267714841e-06, "loss": 0.3953, "step": 5472 }, { "epoch": 0.5326520681265207, "grad_norm": 1.6558761174677732, "learning_rate": 4.714870663195096e-06, "loss": 0.3006, "step": 5473 }, { "epoch": 0.5327493917274939, "grad_norm": 1.8723588779783473, "learning_rate": 4.713297087008828e-06, "loss": 0.5612, "step": 5474 }, { "epoch": 0.5328467153284672, "grad_norm": 1.5086914821044224, "learning_rate": 4.711723539312407e-06, "loss": 0.3611, "step": 5475 }, { "epoch": 0.5329440389294404, "grad_norm": 1.3672828859024702, "learning_rate": 4.710150020262194e-06, "loss": 0.3355, "step": 5476 }, { "epoch": 0.5330413625304137, "grad_norm": 1.5872060278291604, "learning_rate": 4.708576530014551e-06, "loss": 0.551, "step": 5477 }, { "epoch": 0.5331386861313868, "grad_norm": 1.2317312878100395, "learning_rate": 4.707003068725839e-06, "loss": 0.2379, "step": 5478 }, { "epoch": 0.5332360097323601, "grad_norm": 1.5485872752470256, "learning_rate": 4.705429636552411e-06, "loss": 0.4116, "step": 5479 }, { "epoch": 0.5333333333333333, "grad_norm": 1.394266055239885, "learning_rate": 4.703856233650621e-06, "loss": 0.3408, "step": 5480 }, { "epoch": 0.5334306569343066, "grad_norm": 1.2707129040965355, "learning_rate": 4.702282860176818e-06, "loss": 0.3329, "step": 5481 }, { "epoch": 0.5335279805352798, "grad_norm": 1.148637922794862, "learning_rate": 4.7007095162873525e-06, "loss": 0.2492, "step": 5482 }, { "epoch": 0.5336253041362531, "grad_norm": 1.432055320891234, "learning_rate": 4.699136202138565e-06, "loss": 0.3254, "step": 5483 }, { "epoch": 0.5337226277372262, "grad_norm": 1.5691038576110405, "learning_rate": 4.697562917886798e-06, "loss": 0.4628, "step": 5484 }, { "epoch": 0.5338199513381995, "grad_norm": 1.4163562023075924, "learning_rate": 4.69598966368839e-06, "loss": 0.4073, "step": 5485 }, { "epoch": 0.5339172749391727, "grad_norm": 1.290810065128507, "learning_rate": 4.694416439699676e-06, "loss": 0.3465, "step": 5486 }, { "epoch": 0.534014598540146, "grad_norm": 1.4067443826338528, "learning_rate": 4.692843246076988e-06, "loss": 0.4324, "step": 5487 }, { "epoch": 0.5341119221411192, "grad_norm": 1.6253002329552109, "learning_rate": 4.691270082976655e-06, "loss": 0.4271, "step": 5488 }, { "epoch": 0.5342092457420925, "grad_norm": 1.6775288750200037, "learning_rate": 4.689696950555006e-06, "loss": 0.5078, "step": 5489 }, { "epoch": 0.5343065693430656, "grad_norm": 1.2291968659823704, "learning_rate": 4.6881238489683596e-06, "loss": 0.2067, "step": 5490 }, { "epoch": 0.5344038929440389, "grad_norm": 1.1572029723004311, "learning_rate": 4.686550778373037e-06, "loss": 0.3056, "step": 5491 }, { "epoch": 0.5345012165450121, "grad_norm": 1.6140721486071878, "learning_rate": 4.684977738925357e-06, "loss": 0.294, "step": 5492 }, { "epoch": 0.5345985401459854, "grad_norm": 1.314893492892741, "learning_rate": 4.683404730781635e-06, "loss": 0.2931, "step": 5493 }, { "epoch": 0.5346958637469587, "grad_norm": 1.3510129395633992, "learning_rate": 4.6818317540981775e-06, "loss": 0.3709, "step": 5494 }, { "epoch": 0.5347931873479319, "grad_norm": 1.740798327926576, "learning_rate": 4.6802588090312935e-06, "loss": 0.3371, "step": 5495 }, { "epoch": 0.5348905109489052, "grad_norm": 1.1472362022276765, "learning_rate": 4.6786858957372905e-06, "loss": 0.2323, "step": 5496 }, { "epoch": 0.5349878345498783, "grad_norm": 1.6487002773465846, "learning_rate": 4.6771130143724654e-06, "loss": 0.5127, "step": 5497 }, { "epoch": 0.5350851581508516, "grad_norm": 1.1265346867571007, "learning_rate": 4.675540165093119e-06, "loss": 0.3499, "step": 5498 }, { "epoch": 0.5351824817518248, "grad_norm": 1.526197622964153, "learning_rate": 4.673967348055546e-06, "loss": 0.4363, "step": 5499 }, { "epoch": 0.5352798053527981, "grad_norm": 1.2805452967462896, "learning_rate": 4.67239456341604e-06, "loss": 0.4261, "step": 5500 }, { "epoch": 0.5353771289537713, "grad_norm": 1.6046810398881801, "learning_rate": 4.670821811330884e-06, "loss": 0.4014, "step": 5501 }, { "epoch": 0.5354744525547446, "grad_norm": 1.3047924342186952, "learning_rate": 4.669249091956368e-06, "loss": 0.2704, "step": 5502 }, { "epoch": 0.5355717761557177, "grad_norm": 1.4224088735505336, "learning_rate": 4.667676405448776e-06, "loss": 0.4957, "step": 5503 }, { "epoch": 0.535669099756691, "grad_norm": 1.258935225874021, "learning_rate": 4.666103751964381e-06, "loss": 0.368, "step": 5504 }, { "epoch": 0.5357664233576642, "grad_norm": 1.4746017759013634, "learning_rate": 4.664531131659461e-06, "loss": 0.3029, "step": 5505 }, { "epoch": 0.5358637469586375, "grad_norm": 2.2111545631424576, "learning_rate": 4.66295854469029e-06, "loss": 0.5238, "step": 5506 }, { "epoch": 0.5359610705596107, "grad_norm": 1.2922254194717746, "learning_rate": 4.661385991213135e-06, "loss": 0.3183, "step": 5507 }, { "epoch": 0.536058394160584, "grad_norm": 1.5073042887713033, "learning_rate": 4.6598134713842625e-06, "loss": 0.481, "step": 5508 }, { "epoch": 0.5361557177615571, "grad_norm": 1.6172418999851652, "learning_rate": 4.658240985359934e-06, "loss": 0.4367, "step": 5509 }, { "epoch": 0.5362530413625304, "grad_norm": 1.2326762425071527, "learning_rate": 4.656668533296409e-06, "loss": 0.2636, "step": 5510 }, { "epoch": 0.5363503649635036, "grad_norm": 1.4249179682627615, "learning_rate": 4.655096115349943e-06, "loss": 0.3937, "step": 5511 }, { "epoch": 0.5364476885644769, "grad_norm": 1.6515521234134023, "learning_rate": 4.653523731676788e-06, "loss": 0.7111, "step": 5512 }, { "epoch": 0.5365450121654501, "grad_norm": 1.9900282583674413, "learning_rate": 4.651951382433193e-06, "loss": 0.3396, "step": 5513 }, { "epoch": 0.5366423357664234, "grad_norm": 1.012284934196896, "learning_rate": 4.650379067775404e-06, "loss": 0.1941, "step": 5514 }, { "epoch": 0.5367396593673966, "grad_norm": 1.5676311925959359, "learning_rate": 4.64880678785966e-06, "loss": 0.5501, "step": 5515 }, { "epoch": 0.5368369829683698, "grad_norm": 1.439297257451148, "learning_rate": 4.647234542842203e-06, "loss": 0.4706, "step": 5516 }, { "epoch": 0.536934306569343, "grad_norm": 1.6521861765513306, "learning_rate": 4.645662332879264e-06, "loss": 0.4775, "step": 5517 }, { "epoch": 0.5370316301703163, "grad_norm": 1.4252575056075478, "learning_rate": 4.644090158127079e-06, "loss": 0.4707, "step": 5518 }, { "epoch": 0.5371289537712896, "grad_norm": 3.029626785307384, "learning_rate": 4.642518018741873e-06, "loss": 0.2853, "step": 5519 }, { "epoch": 0.5372262773722628, "grad_norm": 1.4336668406467705, "learning_rate": 4.64094591487987e-06, "loss": 0.4253, "step": 5520 }, { "epoch": 0.5373236009732361, "grad_norm": 1.2542843827843542, "learning_rate": 4.639373846697295e-06, "loss": 0.4461, "step": 5521 }, { "epoch": 0.5374209245742092, "grad_norm": 1.7471595600632224, "learning_rate": 4.63780181435036e-06, "loss": 0.5609, "step": 5522 }, { "epoch": 0.5375182481751825, "grad_norm": 1.382828706980413, "learning_rate": 4.636229817995281e-06, "loss": 0.3993, "step": 5523 }, { "epoch": 0.5376155717761557, "grad_norm": 1.1783662179749346, "learning_rate": 4.63465785778827e-06, "loss": 0.2375, "step": 5524 }, { "epoch": 0.537712895377129, "grad_norm": 1.1098812184057025, "learning_rate": 4.633085933885533e-06, "loss": 0.2646, "step": 5525 }, { "epoch": 0.5378102189781022, "grad_norm": 1.2965070823067013, "learning_rate": 4.631514046443271e-06, "loss": 0.2288, "step": 5526 }, { "epoch": 0.5379075425790755, "grad_norm": 1.554738106215467, "learning_rate": 4.6299421956176846e-06, "loss": 0.475, "step": 5527 }, { "epoch": 0.5380048661800486, "grad_norm": 1.7108794598772463, "learning_rate": 4.62837038156497e-06, "loss": 0.5758, "step": 5528 }, { "epoch": 0.5381021897810219, "grad_norm": 1.4189529576262987, "learning_rate": 4.626798604441319e-06, "loss": 0.446, "step": 5529 }, { "epoch": 0.5381995133819951, "grad_norm": 1.539521589404697, "learning_rate": 4.625226864402919e-06, "loss": 0.4016, "step": 5530 }, { "epoch": 0.5382968369829684, "grad_norm": 1.6995404519914559, "learning_rate": 4.623655161605957e-06, "loss": 0.3043, "step": 5531 }, { "epoch": 0.5383941605839416, "grad_norm": 1.805961456483182, "learning_rate": 4.622083496206614e-06, "loss": 0.4687, "step": 5532 }, { "epoch": 0.5384914841849149, "grad_norm": 1.3127929269793055, "learning_rate": 4.620511868361064e-06, "loss": 0.4509, "step": 5533 }, { "epoch": 0.538588807785888, "grad_norm": 1.2341439719027067, "learning_rate": 4.618940278225484e-06, "loss": 0.3663, "step": 5534 }, { "epoch": 0.5386861313868613, "grad_norm": 1.8175411493609606, "learning_rate": 4.617368725956043e-06, "loss": 0.3676, "step": 5535 }, { "epoch": 0.5387834549878345, "grad_norm": 2.0661946352391194, "learning_rate": 4.615797211708908e-06, "loss": 0.2821, "step": 5536 }, { "epoch": 0.5388807785888078, "grad_norm": 1.2776192184330863, "learning_rate": 4.614225735640238e-06, "loss": 0.3573, "step": 5537 }, { "epoch": 0.538978102189781, "grad_norm": 1.19012757771488, "learning_rate": 4.612654297906194e-06, "loss": 0.2752, "step": 5538 }, { "epoch": 0.5390754257907543, "grad_norm": 1.4951729194125682, "learning_rate": 4.611082898662932e-06, "loss": 0.3265, "step": 5539 }, { "epoch": 0.5391727493917275, "grad_norm": 1.7553617219847182, "learning_rate": 4.6095115380666e-06, "loss": 0.6098, "step": 5540 }, { "epoch": 0.5392700729927007, "grad_norm": 1.9351011623385188, "learning_rate": 4.607940216273347e-06, "loss": 0.4677, "step": 5541 }, { "epoch": 0.5393673965936739, "grad_norm": 1.6215327024782136, "learning_rate": 4.606368933439315e-06, "loss": 0.5683, "step": 5542 }, { "epoch": 0.5394647201946472, "grad_norm": 1.6944511792130725, "learning_rate": 4.604797689720645e-06, "loss": 0.2309, "step": 5543 }, { "epoch": 0.5395620437956204, "grad_norm": 1.2236426521264896, "learning_rate": 4.603226485273471e-06, "loss": 0.2641, "step": 5544 }, { "epoch": 0.5396593673965937, "grad_norm": 1.5572973993338344, "learning_rate": 4.601655320253925e-06, "loss": 0.5321, "step": 5545 }, { "epoch": 0.539756690997567, "grad_norm": 1.572086982766627, "learning_rate": 4.600084194818134e-06, "loss": 0.3651, "step": 5546 }, { "epoch": 0.5398540145985401, "grad_norm": 1.4415785937152057, "learning_rate": 4.598513109122226e-06, "loss": 0.3424, "step": 5547 }, { "epoch": 0.5399513381995134, "grad_norm": 1.1637778734255635, "learning_rate": 4.596942063322314e-06, "loss": 0.3374, "step": 5548 }, { "epoch": 0.5400486618004866, "grad_norm": 1.324226581921668, "learning_rate": 4.595371057574517e-06, "loss": 0.3143, "step": 5549 }, { "epoch": 0.5401459854014599, "grad_norm": 1.3513031639064905, "learning_rate": 4.593800092034947e-06, "loss": 0.5002, "step": 5550 }, { "epoch": 0.5402433090024331, "grad_norm": 1.510265351531761, "learning_rate": 4.5922291668597105e-06, "loss": 0.4232, "step": 5551 }, { "epoch": 0.5403406326034064, "grad_norm": 1.374401135510563, "learning_rate": 4.590658282204913e-06, "loss": 0.361, "step": 5552 }, { "epoch": 0.5404379562043795, "grad_norm": 1.3851090235439831, "learning_rate": 4.5890874382266535e-06, "loss": 0.3712, "step": 5553 }, { "epoch": 0.5405352798053528, "grad_norm": 1.3636680863042874, "learning_rate": 4.58751663508103e-06, "loss": 0.3843, "step": 5554 }, { "epoch": 0.540632603406326, "grad_norm": 1.3296990435555314, "learning_rate": 4.585945872924129e-06, "loss": 0.4047, "step": 5555 }, { "epoch": 0.5407299270072993, "grad_norm": 1.624264550675679, "learning_rate": 4.584375151912043e-06, "loss": 0.3867, "step": 5556 }, { "epoch": 0.5408272506082725, "grad_norm": 1.3684504316145363, "learning_rate": 4.5828044722008515e-06, "loss": 0.3617, "step": 5557 }, { "epoch": 0.5409245742092458, "grad_norm": 1.5271544668466714, "learning_rate": 4.5812338339466395e-06, "loss": 0.4888, "step": 5558 }, { "epoch": 0.541021897810219, "grad_norm": 1.4265749041883962, "learning_rate": 4.579663237305476e-06, "loss": 0.4771, "step": 5559 }, { "epoch": 0.5411192214111922, "grad_norm": 1.509210363535176, "learning_rate": 4.578092682433435e-06, "loss": 0.4779, "step": 5560 }, { "epoch": 0.5412165450121654, "grad_norm": 1.5133305909360217, "learning_rate": 4.576522169486586e-06, "loss": 0.3973, "step": 5561 }, { "epoch": 0.5413138686131387, "grad_norm": 1.6143705002164137, "learning_rate": 4.574951698620987e-06, "loss": 0.3723, "step": 5562 }, { "epoch": 0.5414111922141119, "grad_norm": 1.2292198018803793, "learning_rate": 4.5733812699927e-06, "loss": 0.3499, "step": 5563 }, { "epoch": 0.5415085158150852, "grad_norm": 1.3580361273205916, "learning_rate": 4.571810883757777e-06, "loss": 0.407, "step": 5564 }, { "epoch": 0.5416058394160584, "grad_norm": 1.3493904786417004, "learning_rate": 4.570240540072271e-06, "loss": 0.3004, "step": 5565 }, { "epoch": 0.5417031630170316, "grad_norm": 1.6057393015458046, "learning_rate": 4.568670239092226e-06, "loss": 0.5274, "step": 5566 }, { "epoch": 0.5418004866180048, "grad_norm": 1.3129465161054514, "learning_rate": 4.567099980973684e-06, "loss": 0.2682, "step": 5567 }, { "epoch": 0.5418978102189781, "grad_norm": 1.5646855137657936, "learning_rate": 4.565529765872686e-06, "loss": 0.4151, "step": 5568 }, { "epoch": 0.5419951338199513, "grad_norm": 1.338838472621847, "learning_rate": 4.56395959394526e-06, "loss": 0.3384, "step": 5569 }, { "epoch": 0.5420924574209246, "grad_norm": 1.2489369163341921, "learning_rate": 4.562389465347435e-06, "loss": 0.2837, "step": 5570 }, { "epoch": 0.5421897810218979, "grad_norm": 1.6571482256041326, "learning_rate": 4.56081938023524e-06, "loss": 0.4298, "step": 5571 }, { "epoch": 0.542287104622871, "grad_norm": 1.471288556912, "learning_rate": 4.559249338764695e-06, "loss": 0.4059, "step": 5572 }, { "epoch": 0.5423844282238443, "grad_norm": 1.2544028941551706, "learning_rate": 4.5576793410918115e-06, "loss": 0.372, "step": 5573 }, { "epoch": 0.5424817518248175, "grad_norm": 1.2899575103001732, "learning_rate": 4.556109387372604e-06, "loss": 0.3339, "step": 5574 }, { "epoch": 0.5425790754257908, "grad_norm": 1.6117297972366638, "learning_rate": 4.554539477763079e-06, "loss": 0.3651, "step": 5575 }, { "epoch": 0.542676399026764, "grad_norm": 1.3462531262771449, "learning_rate": 4.552969612419242e-06, "loss": 0.3098, "step": 5576 }, { "epoch": 0.5427737226277373, "grad_norm": 1.2037265809376663, "learning_rate": 4.551399791497087e-06, "loss": 0.2987, "step": 5577 }, { "epoch": 0.5428710462287104, "grad_norm": 1.3939366715986383, "learning_rate": 4.549830015152612e-06, "loss": 0.4183, "step": 5578 }, { "epoch": 0.5429683698296837, "grad_norm": 1.513347290677701, "learning_rate": 4.5482602835418065e-06, "loss": 0.3991, "step": 5579 }, { "epoch": 0.5430656934306569, "grad_norm": 1.1821112586133669, "learning_rate": 4.546690596820652e-06, "loss": 0.3178, "step": 5580 }, { "epoch": 0.5431630170316302, "grad_norm": 1.2066819756592286, "learning_rate": 4.54512095514513e-06, "loss": 0.347, "step": 5581 }, { "epoch": 0.5432603406326034, "grad_norm": 1.5202387165541005, "learning_rate": 4.54355135867122e-06, "loss": 0.4614, "step": 5582 }, { "epoch": 0.5433576642335767, "grad_norm": 1.6204853432853796, "learning_rate": 4.541981807554894e-06, "loss": 0.413, "step": 5583 }, { "epoch": 0.5434549878345499, "grad_norm": 1.3689442710094215, "learning_rate": 4.540412301952116e-06, "loss": 0.3677, "step": 5584 }, { "epoch": 0.5435523114355231, "grad_norm": 1.219397739528809, "learning_rate": 4.538842842018849e-06, "loss": 0.2662, "step": 5585 }, { "epoch": 0.5436496350364963, "grad_norm": 1.4157059534769942, "learning_rate": 4.537273427911053e-06, "loss": 0.3228, "step": 5586 }, { "epoch": 0.5437469586374696, "grad_norm": 1.4212798665178918, "learning_rate": 4.535704059784681e-06, "loss": 0.4971, "step": 5587 }, { "epoch": 0.5438442822384428, "grad_norm": 1.613847462306087, "learning_rate": 4.534134737795682e-06, "loss": 0.6941, "step": 5588 }, { "epoch": 0.5439416058394161, "grad_norm": 1.272085771271991, "learning_rate": 4.532565462099999e-06, "loss": 0.2215, "step": 5589 }, { "epoch": 0.5440389294403893, "grad_norm": 1.3523598672753734, "learning_rate": 4.5309962328535765e-06, "loss": 0.3506, "step": 5590 }, { "epoch": 0.5441362530413625, "grad_norm": 1.78915091883293, "learning_rate": 4.529427050212344e-06, "loss": 0.2881, "step": 5591 }, { "epoch": 0.5442335766423357, "grad_norm": 1.3070852210376076, "learning_rate": 4.527857914332234e-06, "loss": 0.3958, "step": 5592 }, { "epoch": 0.544330900243309, "grad_norm": 1.2331174891417618, "learning_rate": 4.526288825369175e-06, "loss": 0.3897, "step": 5593 }, { "epoch": 0.5444282238442822, "grad_norm": 1.449725615092237, "learning_rate": 4.524719783479088e-06, "loss": 0.309, "step": 5594 }, { "epoch": 0.5445255474452555, "grad_norm": 1.2391040847938808, "learning_rate": 4.523150788817886e-06, "loss": 0.3107, "step": 5595 }, { "epoch": 0.5446228710462288, "grad_norm": 1.6561577969203813, "learning_rate": 4.521581841541483e-06, "loss": 0.4173, "step": 5596 }, { "epoch": 0.5447201946472019, "grad_norm": 1.3282576550823684, "learning_rate": 4.5200129418057885e-06, "loss": 0.3216, "step": 5597 }, { "epoch": 0.5448175182481751, "grad_norm": 1.4024081414615832, "learning_rate": 4.518444089766701e-06, "loss": 0.3722, "step": 5598 }, { "epoch": 0.5449148418491484, "grad_norm": 1.5089106538535892, "learning_rate": 4.516875285580121e-06, "loss": 0.303, "step": 5599 }, { "epoch": 0.5450121654501217, "grad_norm": 1.5031317760291008, "learning_rate": 4.51530652940194e-06, "loss": 0.4373, "step": 5600 }, { "epoch": 0.5451094890510949, "grad_norm": 1.471008431203656, "learning_rate": 4.513737821388049e-06, "loss": 0.4556, "step": 5601 }, { "epoch": 0.5452068126520682, "grad_norm": 1.7413080844105027, "learning_rate": 4.512169161694328e-06, "loss": 0.5666, "step": 5602 }, { "epoch": 0.5453041362530414, "grad_norm": 1.8071416376670888, "learning_rate": 4.510600550476657e-06, "loss": 0.3715, "step": 5603 }, { "epoch": 0.5454014598540146, "grad_norm": 1.476639760735993, "learning_rate": 4.509031987890913e-06, "loss": 0.3323, "step": 5604 }, { "epoch": 0.5454987834549878, "grad_norm": 1.3999815360622823, "learning_rate": 4.507463474092959e-06, "loss": 0.3506, "step": 5605 }, { "epoch": 0.5455961070559611, "grad_norm": 1.4655325528983436, "learning_rate": 4.505895009238663e-06, "loss": 0.4597, "step": 5606 }, { "epoch": 0.5456934306569343, "grad_norm": 1.1327268370762924, "learning_rate": 4.504326593483883e-06, "loss": 0.3451, "step": 5607 }, { "epoch": 0.5457907542579076, "grad_norm": 1.2823893435741631, "learning_rate": 4.502758226984477e-06, "loss": 0.2785, "step": 5608 }, { "epoch": 0.5458880778588808, "grad_norm": 1.5753262318211483, "learning_rate": 4.501189909896289e-06, "loss": 0.269, "step": 5609 }, { "epoch": 0.545985401459854, "grad_norm": 1.5009827573371675, "learning_rate": 4.499621642375166e-06, "loss": 0.4056, "step": 5610 }, { "epoch": 0.5460827250608272, "grad_norm": 1.4636375241999275, "learning_rate": 4.498053424576949e-06, "loss": 0.451, "step": 5611 }, { "epoch": 0.5461800486618005, "grad_norm": 1.5065599684505087, "learning_rate": 4.496485256657472e-06, "loss": 0.4837, "step": 5612 }, { "epoch": 0.5462773722627737, "grad_norm": 1.291365998662591, "learning_rate": 4.4949171387725636e-06, "loss": 0.3273, "step": 5613 }, { "epoch": 0.546374695863747, "grad_norm": 1.5794146716287192, "learning_rate": 4.4933490710780495e-06, "loss": 0.4363, "step": 5614 }, { "epoch": 0.5464720194647202, "grad_norm": 1.5379662651856578, "learning_rate": 4.491781053729752e-06, "loss": 0.5449, "step": 5615 }, { "epoch": 0.5465693430656934, "grad_norm": 1.322945266574682, "learning_rate": 4.490213086883482e-06, "loss": 0.4107, "step": 5616 }, { "epoch": 0.5466666666666666, "grad_norm": 1.3543729282674888, "learning_rate": 4.48864517069505e-06, "loss": 0.4165, "step": 5617 }, { "epoch": 0.5467639902676399, "grad_norm": 1.5852531121498197, "learning_rate": 4.487077305320261e-06, "loss": 0.6354, "step": 5618 }, { "epoch": 0.5468613138686131, "grad_norm": 1.3876975460155319, "learning_rate": 4.4855094909149175e-06, "loss": 0.2162, "step": 5619 }, { "epoch": 0.5469586374695864, "grad_norm": 1.3842422077220742, "learning_rate": 4.483941727634811e-06, "loss": 0.4646, "step": 5620 }, { "epoch": 0.5470559610705596, "grad_norm": 1.628949720502462, "learning_rate": 4.482374015635733e-06, "loss": 0.4539, "step": 5621 }, { "epoch": 0.5471532846715328, "grad_norm": 1.3342073178854172, "learning_rate": 4.480806355073467e-06, "loss": 0.3692, "step": 5622 }, { "epoch": 0.547250608272506, "grad_norm": 1.5354680722593141, "learning_rate": 4.479238746103792e-06, "loss": 0.2665, "step": 5623 }, { "epoch": 0.5473479318734793, "grad_norm": 1.7451242234355575, "learning_rate": 4.477671188882483e-06, "loss": 0.6151, "step": 5624 }, { "epoch": 0.5474452554744526, "grad_norm": 1.6591621564148866, "learning_rate": 4.476103683565309e-06, "loss": 0.4289, "step": 5625 }, { "epoch": 0.5475425790754258, "grad_norm": 1.594493169110207, "learning_rate": 4.474536230308036e-06, "loss": 0.5853, "step": 5626 }, { "epoch": 0.5476399026763991, "grad_norm": 1.2386168016252284, "learning_rate": 4.472968829266419e-06, "loss": 0.2942, "step": 5627 }, { "epoch": 0.5477372262773723, "grad_norm": 1.4275181054672363, "learning_rate": 4.4714014805962125e-06, "loss": 0.2697, "step": 5628 }, { "epoch": 0.5478345498783455, "grad_norm": 1.1916440632594898, "learning_rate": 4.4698341844531655e-06, "loss": 0.2913, "step": 5629 }, { "epoch": 0.5479318734793187, "grad_norm": 1.6471889884187896, "learning_rate": 4.468266940993025e-06, "loss": 0.4558, "step": 5630 }, { "epoch": 0.548029197080292, "grad_norm": 1.377839018544238, "learning_rate": 4.466699750371522e-06, "loss": 0.3219, "step": 5631 }, { "epoch": 0.5481265206812652, "grad_norm": 1.5019494739979196, "learning_rate": 4.465132612744394e-06, "loss": 0.3668, "step": 5632 }, { "epoch": 0.5482238442822385, "grad_norm": 1.3771030641989979, "learning_rate": 4.463565528267367e-06, "loss": 0.2507, "step": 5633 }, { "epoch": 0.5483211678832117, "grad_norm": 1.8499678170781386, "learning_rate": 4.4619984970961626e-06, "loss": 0.5311, "step": 5634 }, { "epoch": 0.5484184914841849, "grad_norm": 1.363181736328119, "learning_rate": 4.460431519386498e-06, "loss": 0.3994, "step": 5635 }, { "epoch": 0.5485158150851581, "grad_norm": 1.4284270520142166, "learning_rate": 4.458864595294085e-06, "loss": 0.3709, "step": 5636 }, { "epoch": 0.5486131386861314, "grad_norm": 1.2894122025064025, "learning_rate": 4.457297724974632e-06, "loss": 0.4439, "step": 5637 }, { "epoch": 0.5487104622871046, "grad_norm": 1.3245364457901871, "learning_rate": 4.4557309085838355e-06, "loss": 0.278, "step": 5638 }, { "epoch": 0.5488077858880779, "grad_norm": 1.306145681203802, "learning_rate": 4.454164146277393e-06, "loss": 0.3399, "step": 5639 }, { "epoch": 0.5489051094890511, "grad_norm": 1.641617393076927, "learning_rate": 4.452597438210996e-06, "loss": 0.5549, "step": 5640 }, { "epoch": 0.5490024330900243, "grad_norm": 1.6762169758319294, "learning_rate": 4.451030784540327e-06, "loss": 0.5193, "step": 5641 }, { "epoch": 0.5490997566909975, "grad_norm": 1.7392269005403265, "learning_rate": 4.449464185421066e-06, "loss": 0.4333, "step": 5642 }, { "epoch": 0.5491970802919708, "grad_norm": 1.223662331142104, "learning_rate": 4.4478976410088875e-06, "loss": 0.3168, "step": 5643 }, { "epoch": 0.549294403892944, "grad_norm": 1.1527568529264938, "learning_rate": 4.446331151459461e-06, "loss": 0.2816, "step": 5644 }, { "epoch": 0.5493917274939173, "grad_norm": 1.4589827408583553, "learning_rate": 4.444764716928448e-06, "loss": 0.4807, "step": 5645 }, { "epoch": 0.5494890510948905, "grad_norm": 1.3650449695096383, "learning_rate": 4.443198337571505e-06, "loss": 0.4112, "step": 5646 }, { "epoch": 0.5495863746958638, "grad_norm": 1.2069856209139433, "learning_rate": 4.4416320135442855e-06, "loss": 0.3413, "step": 5647 }, { "epoch": 0.5496836982968369, "grad_norm": 1.2385661763085989, "learning_rate": 4.440065745002438e-06, "loss": 0.2793, "step": 5648 }, { "epoch": 0.5497810218978102, "grad_norm": 1.2572932148786662, "learning_rate": 4.4384995321016e-06, "loss": 0.2908, "step": 5649 }, { "epoch": 0.5498783454987834, "grad_norm": 1.3903538300228837, "learning_rate": 4.436933374997408e-06, "loss": 0.4652, "step": 5650 }, { "epoch": 0.5499756690997567, "grad_norm": 1.2046055786935932, "learning_rate": 4.435367273845496e-06, "loss": 0.3401, "step": 5651 }, { "epoch": 0.55007299270073, "grad_norm": 1.4198268349598033, "learning_rate": 4.433801228801482e-06, "loss": 0.297, "step": 5652 }, { "epoch": 0.5501703163017032, "grad_norm": 4.186875709571215, "learning_rate": 4.432235240020988e-06, "loss": 0.373, "step": 5653 }, { "epoch": 0.5502676399026764, "grad_norm": 1.1066768232323798, "learning_rate": 4.430669307659627e-06, "loss": 0.2626, "step": 5654 }, { "epoch": 0.5503649635036496, "grad_norm": 1.6041719345732885, "learning_rate": 4.429103431873009e-06, "loss": 0.3763, "step": 5655 }, { "epoch": 0.5504622871046229, "grad_norm": 1.4456818804536653, "learning_rate": 4.427537612816732e-06, "loss": 0.486, "step": 5656 }, { "epoch": 0.5505596107055961, "grad_norm": 1.5069550759467354, "learning_rate": 4.425971850646394e-06, "loss": 0.4548, "step": 5657 }, { "epoch": 0.5506569343065694, "grad_norm": 1.5218902555793785, "learning_rate": 4.424406145517589e-06, "loss": 0.4255, "step": 5658 }, { "epoch": 0.5507542579075426, "grad_norm": 1.3616780032585183, "learning_rate": 4.422840497585896e-06, "loss": 0.2637, "step": 5659 }, { "epoch": 0.5508515815085158, "grad_norm": 1.1705809505584408, "learning_rate": 4.4212749070068974e-06, "loss": 0.2304, "step": 5660 }, { "epoch": 0.550948905109489, "grad_norm": 1.5206775411300353, "learning_rate": 4.419709373936167e-06, "loss": 0.4589, "step": 5661 }, { "epoch": 0.5510462287104623, "grad_norm": 1.529486906246868, "learning_rate": 4.418143898529276e-06, "loss": 0.5809, "step": 5662 }, { "epoch": 0.5511435523114355, "grad_norm": 1.4965602057951768, "learning_rate": 4.416578480941781e-06, "loss": 0.4344, "step": 5663 }, { "epoch": 0.5512408759124088, "grad_norm": 1.627661066713738, "learning_rate": 4.4150131213292406e-06, "loss": 0.5971, "step": 5664 }, { "epoch": 0.551338199513382, "grad_norm": 1.5204052605710328, "learning_rate": 4.4134478198472065e-06, "loss": 0.2793, "step": 5665 }, { "epoch": 0.5514355231143553, "grad_norm": 1.6608910942592672, "learning_rate": 4.411882576651224e-06, "loss": 0.4815, "step": 5666 }, { "epoch": 0.5515328467153284, "grad_norm": 1.5777835287666877, "learning_rate": 4.410317391896829e-06, "loss": 0.5266, "step": 5667 }, { "epoch": 0.5516301703163017, "grad_norm": 1.316546333624811, "learning_rate": 4.408752265739559e-06, "loss": 0.3166, "step": 5668 }, { "epoch": 0.5517274939172749, "grad_norm": 1.5371700987399954, "learning_rate": 4.407187198334941e-06, "loss": 0.4064, "step": 5669 }, { "epoch": 0.5518248175182482, "grad_norm": 1.616571428829941, "learning_rate": 4.405622189838492e-06, "loss": 0.5294, "step": 5670 }, { "epoch": 0.5519221411192214, "grad_norm": 1.7448740545910149, "learning_rate": 4.404057240405733e-06, "loss": 0.5254, "step": 5671 }, { "epoch": 0.5520194647201947, "grad_norm": 1.6516182391127119, "learning_rate": 4.4024923501921725e-06, "loss": 0.3904, "step": 5672 }, { "epoch": 0.5521167883211678, "grad_norm": 1.2729215922575123, "learning_rate": 4.400927519353316e-06, "loss": 0.3724, "step": 5673 }, { "epoch": 0.5522141119221411, "grad_norm": 1.424783017553896, "learning_rate": 4.399362748044658e-06, "loss": 0.3236, "step": 5674 }, { "epoch": 0.5523114355231143, "grad_norm": 1.4235511374310892, "learning_rate": 4.397798036421693e-06, "loss": 0.3861, "step": 5675 }, { "epoch": 0.5524087591240876, "grad_norm": 1.1966914780753308, "learning_rate": 4.3962333846399075e-06, "loss": 0.3466, "step": 5676 }, { "epoch": 0.5525060827250609, "grad_norm": 1.311271001249418, "learning_rate": 4.394668792854782e-06, "loss": 0.4016, "step": 5677 }, { "epoch": 0.5526034063260341, "grad_norm": 1.2245364616140033, "learning_rate": 4.393104261221791e-06, "loss": 0.307, "step": 5678 }, { "epoch": 0.5527007299270073, "grad_norm": 1.1602939305667588, "learning_rate": 4.391539789896401e-06, "loss": 0.2741, "step": 5679 }, { "epoch": 0.5527980535279805, "grad_norm": 1.6516764851396022, "learning_rate": 4.389975379034078e-06, "loss": 0.4003, "step": 5680 }, { "epoch": 0.5528953771289538, "grad_norm": 1.2223406009470155, "learning_rate": 4.388411028790276e-06, "loss": 0.2746, "step": 5681 }, { "epoch": 0.552992700729927, "grad_norm": 1.3704512522620642, "learning_rate": 4.386846739320445e-06, "loss": 0.4675, "step": 5682 }, { "epoch": 0.5530900243309003, "grad_norm": 1.4224145724853485, "learning_rate": 4.38528251078003e-06, "loss": 0.479, "step": 5683 }, { "epoch": 0.5531873479318735, "grad_norm": 1.4287947905854521, "learning_rate": 4.383718343324473e-06, "loss": 0.442, "step": 5684 }, { "epoch": 0.5532846715328467, "grad_norm": 1.2537548413473198, "learning_rate": 4.3821542371092e-06, "loss": 0.3575, "step": 5685 }, { "epoch": 0.5533819951338199, "grad_norm": 1.3718420524377242, "learning_rate": 4.380590192289641e-06, "loss": 0.3794, "step": 5686 }, { "epoch": 0.5534793187347932, "grad_norm": 1.3868837070406654, "learning_rate": 4.379026209021216e-06, "loss": 0.4507, "step": 5687 }, { "epoch": 0.5535766423357664, "grad_norm": 1.2436446274636699, "learning_rate": 4.377462287459338e-06, "loss": 0.3465, "step": 5688 }, { "epoch": 0.5536739659367397, "grad_norm": 1.3879155971385935, "learning_rate": 4.3758984277594135e-06, "loss": 0.4479, "step": 5689 }, { "epoch": 0.5537712895377129, "grad_norm": 1.5156770677569695, "learning_rate": 4.374334630076847e-06, "loss": 0.451, "step": 5690 }, { "epoch": 0.5538686131386862, "grad_norm": 1.382281443891787, "learning_rate": 4.372770894567033e-06, "loss": 0.3563, "step": 5691 }, { "epoch": 0.5539659367396593, "grad_norm": 1.339720873300227, "learning_rate": 4.371207221385361e-06, "loss": 0.3686, "step": 5692 }, { "epoch": 0.5540632603406326, "grad_norm": 1.3006558286211083, "learning_rate": 4.369643610687213e-06, "loss": 0.3267, "step": 5693 }, { "epoch": 0.5541605839416058, "grad_norm": 1.5130281519316149, "learning_rate": 4.368080062627967e-06, "loss": 0.5552, "step": 5694 }, { "epoch": 0.5542579075425791, "grad_norm": 1.5043682231045181, "learning_rate": 4.366516577362996e-06, "loss": 0.6171, "step": 5695 }, { "epoch": 0.5543552311435523, "grad_norm": 1.7431379385735046, "learning_rate": 4.36495315504766e-06, "loss": 0.367, "step": 5696 }, { "epoch": 0.5544525547445256, "grad_norm": 1.4365088321402324, "learning_rate": 4.363389795837319e-06, "loss": 0.3972, "step": 5697 }, { "epoch": 0.5545498783454987, "grad_norm": 1.4406249867602698, "learning_rate": 4.361826499887326e-06, "loss": 0.4694, "step": 5698 }, { "epoch": 0.554647201946472, "grad_norm": 4.114315181959679, "learning_rate": 4.360263267353026e-06, "loss": 0.5357, "step": 5699 }, { "epoch": 0.5547445255474452, "grad_norm": 1.5396913899178413, "learning_rate": 4.358700098389757e-06, "loss": 0.5587, "step": 5700 }, { "epoch": 0.5548418491484185, "grad_norm": 1.373870776059156, "learning_rate": 4.357136993152854e-06, "loss": 0.2794, "step": 5701 }, { "epoch": 0.5549391727493918, "grad_norm": 1.1133884846757365, "learning_rate": 4.3555739517976445e-06, "loss": 0.2657, "step": 5702 }, { "epoch": 0.555036496350365, "grad_norm": 1.5146218352253658, "learning_rate": 4.3540109744794464e-06, "loss": 0.4651, "step": 5703 }, { "epoch": 0.5551338199513381, "grad_norm": 1.4914633706505942, "learning_rate": 4.352448061353574e-06, "loss": 0.3688, "step": 5704 }, { "epoch": 0.5552311435523114, "grad_norm": 1.1492070325180173, "learning_rate": 4.350885212575339e-06, "loss": 0.3122, "step": 5705 }, { "epoch": 0.5553284671532847, "grad_norm": 1.6119735627290095, "learning_rate": 4.3493224283000365e-06, "loss": 0.4979, "step": 5706 }, { "epoch": 0.5554257907542579, "grad_norm": 1.6176815291929274, "learning_rate": 4.3477597086829644e-06, "loss": 0.4076, "step": 5707 }, { "epoch": 0.5555231143552312, "grad_norm": 1.541033726575006, "learning_rate": 4.346197053879411e-06, "loss": 0.5452, "step": 5708 }, { "epoch": 0.5556204379562044, "grad_norm": 1.6140867720751837, "learning_rate": 4.344634464044659e-06, "loss": 0.5637, "step": 5709 }, { "epoch": 0.5557177615571777, "grad_norm": 1.372663767342997, "learning_rate": 4.3430719393339825e-06, "loss": 0.2497, "step": 5710 }, { "epoch": 0.5558150851581508, "grad_norm": 1.6676721242118582, "learning_rate": 4.341509479902652e-06, "loss": 0.3345, "step": 5711 }, { "epoch": 0.5559124087591241, "grad_norm": 1.5318986725427477, "learning_rate": 4.339947085905928e-06, "loss": 0.4867, "step": 5712 }, { "epoch": 0.5560097323600973, "grad_norm": 1.4807254296988144, "learning_rate": 4.338384757499069e-06, "loss": 0.4826, "step": 5713 }, { "epoch": 0.5561070559610706, "grad_norm": 1.3413221916117712, "learning_rate": 4.336822494837322e-06, "loss": 0.2589, "step": 5714 }, { "epoch": 0.5562043795620438, "grad_norm": 1.3675349638686607, "learning_rate": 4.335260298075932e-06, "loss": 0.476, "step": 5715 }, { "epoch": 0.5563017031630171, "grad_norm": 1.6791691889716327, "learning_rate": 4.333698167370136e-06, "loss": 0.4672, "step": 5716 }, { "epoch": 0.5563990267639902, "grad_norm": 1.7601989822178437, "learning_rate": 4.3321361028751615e-06, "loss": 0.4119, "step": 5717 }, { "epoch": 0.5564963503649635, "grad_norm": 1.5395900383761547, "learning_rate": 4.330574104746232e-06, "loss": 0.3063, "step": 5718 }, { "epoch": 0.5565936739659367, "grad_norm": 1.6715153102236342, "learning_rate": 4.329012173138565e-06, "loss": 0.5143, "step": 5719 }, { "epoch": 0.55669099756691, "grad_norm": 1.4393361016871615, "learning_rate": 4.327450308207373e-06, "loss": 0.4984, "step": 5720 }, { "epoch": 0.5567883211678832, "grad_norm": 1.4389287234077905, "learning_rate": 4.3258885101078565e-06, "loss": 0.36, "step": 5721 }, { "epoch": 0.5568856447688565, "grad_norm": 1.451516522520128, "learning_rate": 4.324326778995212e-06, "loss": 0.3245, "step": 5722 }, { "epoch": 0.5569829683698296, "grad_norm": 1.5139970983782318, "learning_rate": 4.322765115024633e-06, "loss": 0.4673, "step": 5723 }, { "epoch": 0.5570802919708029, "grad_norm": 1.6551885982590882, "learning_rate": 4.321203518351298e-06, "loss": 0.5166, "step": 5724 }, { "epoch": 0.5571776155717761, "grad_norm": 1.293506061205028, "learning_rate": 4.319641989130388e-06, "loss": 0.3144, "step": 5725 }, { "epoch": 0.5572749391727494, "grad_norm": 1.480498703625906, "learning_rate": 4.318080527517071e-06, "loss": 0.4621, "step": 5726 }, { "epoch": 0.5573722627737226, "grad_norm": 1.802412969469596, "learning_rate": 4.316519133666513e-06, "loss": 0.5655, "step": 5727 }, { "epoch": 0.5574695863746959, "grad_norm": 1.4758025136034716, "learning_rate": 4.314957807733867e-06, "loss": 0.4271, "step": 5728 }, { "epoch": 0.557566909975669, "grad_norm": 1.3462634864634564, "learning_rate": 4.313396549874284e-06, "loss": 0.3833, "step": 5729 }, { "epoch": 0.5576642335766423, "grad_norm": 1.4167591326587325, "learning_rate": 4.311835360242908e-06, "loss": 0.5547, "step": 5730 }, { "epoch": 0.5577615571776156, "grad_norm": 1.4905562346969488, "learning_rate": 4.310274238994879e-06, "loss": 0.3975, "step": 5731 }, { "epoch": 0.5578588807785888, "grad_norm": 1.35949627565503, "learning_rate": 4.30871318628532e-06, "loss": 0.4013, "step": 5732 }, { "epoch": 0.5579562043795621, "grad_norm": 1.475535363492599, "learning_rate": 4.307152202269356e-06, "loss": 0.3366, "step": 5733 }, { "epoch": 0.5580535279805353, "grad_norm": 1.409351827426732, "learning_rate": 4.305591287102105e-06, "loss": 0.4553, "step": 5734 }, { "epoch": 0.5581508515815086, "grad_norm": 1.5223924231145742, "learning_rate": 4.3040304409386735e-06, "loss": 0.389, "step": 5735 }, { "epoch": 0.5582481751824817, "grad_norm": 1.5817444341183888, "learning_rate": 4.302469663934164e-06, "loss": 0.544, "step": 5736 }, { "epoch": 0.558345498783455, "grad_norm": 1.2791959533486645, "learning_rate": 4.300908956243674e-06, "loss": 0.1936, "step": 5737 }, { "epoch": 0.5584428223844282, "grad_norm": 1.6055462672321668, "learning_rate": 4.299348318022293e-06, "loss": 0.4708, "step": 5738 }, { "epoch": 0.5585401459854015, "grad_norm": 2.723561556469742, "learning_rate": 4.297787749425096e-06, "loss": 0.4138, "step": 5739 }, { "epoch": 0.5586374695863747, "grad_norm": 1.5183098206163341, "learning_rate": 4.296227250607163e-06, "loss": 0.4572, "step": 5740 }, { "epoch": 0.558734793187348, "grad_norm": 1.3705952925621963, "learning_rate": 4.294666821723564e-06, "loss": 0.2, "step": 5741 }, { "epoch": 0.5588321167883211, "grad_norm": 1.3938022278720688, "learning_rate": 4.293106462929353e-06, "loss": 0.4189, "step": 5742 }, { "epoch": 0.5589294403892944, "grad_norm": 1.573159138093502, "learning_rate": 4.291546174379588e-06, "loss": 0.4792, "step": 5743 }, { "epoch": 0.5590267639902676, "grad_norm": 1.5477733122965147, "learning_rate": 4.289985956229315e-06, "loss": 0.381, "step": 5744 }, { "epoch": 0.5591240875912409, "grad_norm": 1.604271658231785, "learning_rate": 4.2884258086335755e-06, "loss": 0.3831, "step": 5745 }, { "epoch": 0.5592214111922141, "grad_norm": 1.7620343181545848, "learning_rate": 4.2868657317474e-06, "loss": 0.4775, "step": 5746 }, { "epoch": 0.5593187347931874, "grad_norm": 1.6098626148059358, "learning_rate": 4.285305725725814e-06, "loss": 0.6441, "step": 5747 }, { "epoch": 0.5594160583941605, "grad_norm": 2.5639184886931563, "learning_rate": 4.283745790723837e-06, "loss": 0.3698, "step": 5748 }, { "epoch": 0.5595133819951338, "grad_norm": 1.8228770515133712, "learning_rate": 4.282185926896483e-06, "loss": 0.7033, "step": 5749 }, { "epoch": 0.559610705596107, "grad_norm": 1.592250193071945, "learning_rate": 4.280626134398753e-06, "loss": 0.3942, "step": 5750 }, { "epoch": 0.5597080291970803, "grad_norm": 1.349043245723801, "learning_rate": 4.279066413385646e-06, "loss": 0.307, "step": 5751 }, { "epoch": 0.5598053527980535, "grad_norm": 1.371459433976103, "learning_rate": 4.2775067640121554e-06, "loss": 0.345, "step": 5752 }, { "epoch": 0.5599026763990268, "grad_norm": 1.4470123192896125, "learning_rate": 4.27594718643326e-06, "loss": 0.4186, "step": 5753 }, { "epoch": 0.56, "grad_norm": 1.3477849484233957, "learning_rate": 4.274387680803936e-06, "loss": 0.3387, "step": 5754 }, { "epoch": 0.5600973236009732, "grad_norm": 1.4259890888219338, "learning_rate": 4.272828247279156e-06, "loss": 0.4646, "step": 5755 }, { "epoch": 0.5601946472019464, "grad_norm": 1.1946278524912584, "learning_rate": 4.27126888601388e-06, "loss": 0.367, "step": 5756 }, { "epoch": 0.5602919708029197, "grad_norm": 1.5421013434092867, "learning_rate": 4.269709597163062e-06, "loss": 0.4504, "step": 5757 }, { "epoch": 0.560389294403893, "grad_norm": 1.9498639157078792, "learning_rate": 4.26815038088165e-06, "loss": 0.4335, "step": 5758 }, { "epoch": 0.5604866180048662, "grad_norm": 2.0494519966647915, "learning_rate": 4.2665912373245875e-06, "loss": 0.5742, "step": 5759 }, { "epoch": 0.5605839416058395, "grad_norm": 1.5747561614186576, "learning_rate": 4.265032166646801e-06, "loss": 0.3735, "step": 5760 }, { "epoch": 0.5606812652068126, "grad_norm": 1.843850149247229, "learning_rate": 4.26347316900322e-06, "loss": 0.3419, "step": 5761 }, { "epoch": 0.5607785888077859, "grad_norm": 1.4477279686493996, "learning_rate": 4.261914244548764e-06, "loss": 0.332, "step": 5762 }, { "epoch": 0.5608759124087591, "grad_norm": 1.6852735737655822, "learning_rate": 4.260355393438345e-06, "loss": 0.4122, "step": 5763 }, { "epoch": 0.5609732360097324, "grad_norm": 1.4724437448995873, "learning_rate": 4.2587966158268624e-06, "loss": 0.2869, "step": 5764 }, { "epoch": 0.5610705596107056, "grad_norm": 1.640785790693152, "learning_rate": 4.2572379118692155e-06, "loss": 0.5461, "step": 5765 }, { "epoch": 0.5611678832116789, "grad_norm": 1.3247785707104316, "learning_rate": 4.255679281720295e-06, "loss": 0.2946, "step": 5766 }, { "epoch": 0.561265206812652, "grad_norm": 1.3947349848303983, "learning_rate": 4.254120725534983e-06, "loss": 0.3114, "step": 5767 }, { "epoch": 0.5613625304136253, "grad_norm": 1.4054907974625854, "learning_rate": 4.25256224346815e-06, "loss": 0.4066, "step": 5768 }, { "epoch": 0.5614598540145985, "grad_norm": 1.539561776443452, "learning_rate": 4.251003835674668e-06, "loss": 0.3611, "step": 5769 }, { "epoch": 0.5615571776155718, "grad_norm": 1.4914655286978868, "learning_rate": 4.249445502309395e-06, "loss": 0.5131, "step": 5770 }, { "epoch": 0.561654501216545, "grad_norm": 1.519511166319719, "learning_rate": 4.247887243527184e-06, "loss": 0.5641, "step": 5771 }, { "epoch": 0.5617518248175183, "grad_norm": 1.8221036424020411, "learning_rate": 4.246329059482879e-06, "loss": 0.5552, "step": 5772 }, { "epoch": 0.5618491484184915, "grad_norm": 1.6851180891545114, "learning_rate": 4.24477095033132e-06, "loss": 0.3155, "step": 5773 }, { "epoch": 0.5619464720194647, "grad_norm": 1.3545287112899682, "learning_rate": 4.243212916227336e-06, "loss": 0.3855, "step": 5774 }, { "epoch": 0.5620437956204379, "grad_norm": 1.58335034043397, "learning_rate": 4.241654957325749e-06, "loss": 0.4907, "step": 5775 }, { "epoch": 0.5621411192214112, "grad_norm": 1.5281306822501477, "learning_rate": 4.240097073781374e-06, "loss": 0.5293, "step": 5776 }, { "epoch": 0.5622384428223844, "grad_norm": 1.4214115435503911, "learning_rate": 4.238539265749022e-06, "loss": 0.4531, "step": 5777 }, { "epoch": 0.5623357664233577, "grad_norm": 1.3606956468499203, "learning_rate": 4.236981533383489e-06, "loss": 0.3181, "step": 5778 }, { "epoch": 0.562433090024331, "grad_norm": 1.4931280557146895, "learning_rate": 4.2354238768395705e-06, "loss": 0.4433, "step": 5779 }, { "epoch": 0.5625304136253041, "grad_norm": 1.5924777768678957, "learning_rate": 4.233866296272052e-06, "loss": 0.2889, "step": 5780 }, { "epoch": 0.5626277372262773, "grad_norm": 1.3373551013926965, "learning_rate": 4.23230879183571e-06, "loss": 0.339, "step": 5781 }, { "epoch": 0.5627250608272506, "grad_norm": 1.3025930805591237, "learning_rate": 4.230751363685316e-06, "loss": 0.3216, "step": 5782 }, { "epoch": 0.5628223844282239, "grad_norm": 1.301444677764997, "learning_rate": 4.22919401197563e-06, "loss": 0.3536, "step": 5783 }, { "epoch": 0.5629197080291971, "grad_norm": 1.3653717626770583, "learning_rate": 4.22763673686141e-06, "loss": 0.3509, "step": 5784 }, { "epoch": 0.5630170316301704, "grad_norm": 1.183773971282679, "learning_rate": 4.226079538497404e-06, "loss": 0.2994, "step": 5785 }, { "epoch": 0.5631143552311435, "grad_norm": 1.1602299214100964, "learning_rate": 4.224522417038348e-06, "loss": 0.337, "step": 5786 }, { "epoch": 0.5632116788321168, "grad_norm": 2.2904940870900288, "learning_rate": 4.2229653726389765e-06, "loss": 0.4567, "step": 5787 }, { "epoch": 0.56330900243309, "grad_norm": 1.487082411549415, "learning_rate": 4.221408405454014e-06, "loss": 0.3703, "step": 5788 }, { "epoch": 0.5634063260340633, "grad_norm": 1.257244811281916, "learning_rate": 4.219851515638175e-06, "loss": 0.3922, "step": 5789 }, { "epoch": 0.5635036496350365, "grad_norm": 1.6782680457611208, "learning_rate": 4.218294703346171e-06, "loss": 0.5096, "step": 5790 }, { "epoch": 0.5636009732360098, "grad_norm": 1.4268072747672031, "learning_rate": 4.216737968732703e-06, "loss": 0.4082, "step": 5791 }, { "epoch": 0.5636982968369829, "grad_norm": 1.611062507700133, "learning_rate": 4.215181311952466e-06, "loss": 0.4716, "step": 5792 }, { "epoch": 0.5637956204379562, "grad_norm": 1.41265833144514, "learning_rate": 4.213624733160143e-06, "loss": 0.3368, "step": 5793 }, { "epoch": 0.5638929440389294, "grad_norm": 1.5428232631419951, "learning_rate": 4.212068232510413e-06, "loss": 0.3018, "step": 5794 }, { "epoch": 0.5639902676399027, "grad_norm": 1.373505560498114, "learning_rate": 4.2105118101579505e-06, "loss": 0.176, "step": 5795 }, { "epoch": 0.5640875912408759, "grad_norm": 1.2307753408032756, "learning_rate": 4.2089554662574115e-06, "loss": 0.201, "step": 5796 }, { "epoch": 0.5641849148418492, "grad_norm": 1.587634977474687, "learning_rate": 4.207399200963454e-06, "loss": 0.5812, "step": 5797 }, { "epoch": 0.5642822384428224, "grad_norm": 1.2387652008017036, "learning_rate": 4.205843014430724e-06, "loss": 0.2995, "step": 5798 }, { "epoch": 0.5643795620437956, "grad_norm": 1.6778596380542672, "learning_rate": 4.204286906813865e-06, "loss": 0.3894, "step": 5799 }, { "epoch": 0.5644768856447688, "grad_norm": 1.2434548684379014, "learning_rate": 4.202730878267503e-06, "loss": 0.3497, "step": 5800 }, { "epoch": 0.5645742092457421, "grad_norm": 1.7892108172776426, "learning_rate": 4.201174928946265e-06, "loss": 0.5834, "step": 5801 }, { "epoch": 0.5646715328467153, "grad_norm": 1.5107863752021933, "learning_rate": 4.199619059004764e-06, "loss": 0.4172, "step": 5802 }, { "epoch": 0.5647688564476886, "grad_norm": 1.670276055504903, "learning_rate": 4.19806326859761e-06, "loss": 0.5403, "step": 5803 }, { "epoch": 0.5648661800486618, "grad_norm": 1.6019603700248415, "learning_rate": 4.196507557879401e-06, "loss": 0.461, "step": 5804 }, { "epoch": 0.564963503649635, "grad_norm": 1.8552966871289693, "learning_rate": 4.19495192700473e-06, "loss": 0.308, "step": 5805 }, { "epoch": 0.5650608272506082, "grad_norm": 1.2212880434574651, "learning_rate": 4.193396376128183e-06, "loss": 0.3277, "step": 5806 }, { "epoch": 0.5651581508515815, "grad_norm": 1.3939012659208478, "learning_rate": 4.191840905404332e-06, "loss": 0.4034, "step": 5807 }, { "epoch": 0.5652554744525548, "grad_norm": 1.5378765048385783, "learning_rate": 4.190285514987746e-06, "loss": 0.4876, "step": 5808 }, { "epoch": 0.565352798053528, "grad_norm": 1.3633671162406111, "learning_rate": 4.1887302050329864e-06, "loss": 0.3709, "step": 5809 }, { "epoch": 0.5654501216545013, "grad_norm": 1.564768784356169, "learning_rate": 4.1871749756946075e-06, "loss": 0.4103, "step": 5810 }, { "epoch": 0.5655474452554744, "grad_norm": 1.724405105952962, "learning_rate": 4.185619827127148e-06, "loss": 0.4854, "step": 5811 }, { "epoch": 0.5656447688564477, "grad_norm": 1.622295917436671, "learning_rate": 4.184064759485148e-06, "loss": 0.4564, "step": 5812 }, { "epoch": 0.5657420924574209, "grad_norm": 1.5517152524744877, "learning_rate": 4.182509772923134e-06, "loss": 0.5225, "step": 5813 }, { "epoch": 0.5658394160583942, "grad_norm": 1.4068333581415946, "learning_rate": 4.180954867595628e-06, "loss": 0.4325, "step": 5814 }, { "epoch": 0.5659367396593674, "grad_norm": 1.6735239186488111, "learning_rate": 4.179400043657138e-06, "loss": 0.3124, "step": 5815 }, { "epoch": 0.5660340632603407, "grad_norm": 1.5777685688239573, "learning_rate": 4.17784530126217e-06, "loss": 0.5863, "step": 5816 }, { "epoch": 0.5661313868613139, "grad_norm": 1.5423998592768402, "learning_rate": 4.176290640565223e-06, "loss": 0.3238, "step": 5817 }, { "epoch": 0.5662287104622871, "grad_norm": 1.2853432377266125, "learning_rate": 4.174736061720778e-06, "loss": 0.3231, "step": 5818 }, { "epoch": 0.5663260340632603, "grad_norm": 1.3506108279634295, "learning_rate": 4.173181564883318e-06, "loss": 0.3853, "step": 5819 }, { "epoch": 0.5664233576642336, "grad_norm": 1.4787948123078365, "learning_rate": 4.171627150207314e-06, "loss": 0.4626, "step": 5820 }, { "epoch": 0.5665206812652068, "grad_norm": 1.2974132661734263, "learning_rate": 4.170072817847232e-06, "loss": 0.3034, "step": 5821 }, { "epoch": 0.5666180048661801, "grad_norm": 1.6572259151028126, "learning_rate": 4.1685185679575226e-06, "loss": 0.4438, "step": 5822 }, { "epoch": 0.5667153284671533, "grad_norm": 1.47164631421599, "learning_rate": 4.166964400692633e-06, "loss": 0.4198, "step": 5823 }, { "epoch": 0.5668126520681265, "grad_norm": 1.7131371141330365, "learning_rate": 4.165410316207004e-06, "loss": 0.7682, "step": 5824 }, { "epoch": 0.5669099756690997, "grad_norm": 1.2962205453467226, "learning_rate": 4.1638563146550646e-06, "loss": 0.324, "step": 5825 }, { "epoch": 0.567007299270073, "grad_norm": 1.4390408094927323, "learning_rate": 4.162302396191237e-06, "loss": 0.4436, "step": 5826 }, { "epoch": 0.5671046228710462, "grad_norm": 1.3518474476934619, "learning_rate": 4.160748560969935e-06, "loss": 0.3033, "step": 5827 }, { "epoch": 0.5672019464720195, "grad_norm": 4.787110968736985, "learning_rate": 4.159194809145567e-06, "loss": 0.2463, "step": 5828 }, { "epoch": 0.5672992700729927, "grad_norm": 1.525896562397849, "learning_rate": 4.157641140872524e-06, "loss": 0.5168, "step": 5829 }, { "epoch": 0.5673965936739659, "grad_norm": 1.6175950868182933, "learning_rate": 4.1560875563052e-06, "loss": 0.617, "step": 5830 }, { "epoch": 0.5674939172749391, "grad_norm": 1.1931895932856638, "learning_rate": 4.154534055597973e-06, "loss": 0.2237, "step": 5831 }, { "epoch": 0.5675912408759124, "grad_norm": 1.448923661863635, "learning_rate": 4.15298063890522e-06, "loss": 0.4892, "step": 5832 }, { "epoch": 0.5676885644768856, "grad_norm": 1.617135751974373, "learning_rate": 4.151427306381298e-06, "loss": 0.3665, "step": 5833 }, { "epoch": 0.5677858880778589, "grad_norm": 1.4221255741874326, "learning_rate": 4.1498740581805675e-06, "loss": 0.4273, "step": 5834 }, { "epoch": 0.5678832116788322, "grad_norm": 1.0648209522518866, "learning_rate": 4.148320894457375e-06, "loss": 0.2857, "step": 5835 }, { "epoch": 0.5679805352798053, "grad_norm": 1.4399789913287415, "learning_rate": 4.146767815366058e-06, "loss": 0.4965, "step": 5836 }, { "epoch": 0.5680778588807786, "grad_norm": 1.4435146387650877, "learning_rate": 4.1452148210609466e-06, "loss": 0.4407, "step": 5837 }, { "epoch": 0.5681751824817518, "grad_norm": 1.327625898939338, "learning_rate": 4.143661911696365e-06, "loss": 0.3226, "step": 5838 }, { "epoch": 0.5682725060827251, "grad_norm": 1.4223642031676123, "learning_rate": 4.142109087426625e-06, "loss": 0.3701, "step": 5839 }, { "epoch": 0.5683698296836983, "grad_norm": 1.4250017935758312, "learning_rate": 4.140556348406033e-06, "loss": 0.4687, "step": 5840 }, { "epoch": 0.5684671532846716, "grad_norm": 3.7430897940959205, "learning_rate": 4.139003694788885e-06, "loss": 0.3839, "step": 5841 }, { "epoch": 0.5685644768856448, "grad_norm": 1.5338848566461847, "learning_rate": 4.13745112672947e-06, "loss": 0.4931, "step": 5842 }, { "epoch": 0.568661800486618, "grad_norm": 1.7129285540671277, "learning_rate": 4.135898644382065e-06, "loss": 0.3602, "step": 5843 }, { "epoch": 0.5687591240875912, "grad_norm": 1.5385589550427017, "learning_rate": 4.1343462479009425e-06, "loss": 0.3521, "step": 5844 }, { "epoch": 0.5688564476885645, "grad_norm": 1.3906904055183722, "learning_rate": 4.132793937440366e-06, "loss": 0.5322, "step": 5845 }, { "epoch": 0.5689537712895377, "grad_norm": 1.5661330139359453, "learning_rate": 4.13124171315459e-06, "loss": 0.443, "step": 5846 }, { "epoch": 0.569051094890511, "grad_norm": 1.5404268210814747, "learning_rate": 4.129689575197857e-06, "loss": 0.308, "step": 5847 }, { "epoch": 0.5691484184914842, "grad_norm": 1.274700451238236, "learning_rate": 4.128137523724407e-06, "loss": 0.3413, "step": 5848 }, { "epoch": 0.5692457420924574, "grad_norm": 1.5479710866706966, "learning_rate": 4.126585558888466e-06, "loss": 0.6331, "step": 5849 }, { "epoch": 0.5693430656934306, "grad_norm": 1.3218775395217923, "learning_rate": 4.125033680844257e-06, "loss": 0.2476, "step": 5850 }, { "epoch": 0.5694403892944039, "grad_norm": 1.6071205273848088, "learning_rate": 4.123481889745987e-06, "loss": 0.3284, "step": 5851 }, { "epoch": 0.5695377128953771, "grad_norm": 1.93464499516687, "learning_rate": 4.1219301857478615e-06, "loss": 0.4431, "step": 5852 }, { "epoch": 0.5696350364963504, "grad_norm": 1.4423423146713994, "learning_rate": 4.120378569004074e-06, "loss": 0.3544, "step": 5853 }, { "epoch": 0.5697323600973236, "grad_norm": 1.5059937888393515, "learning_rate": 4.118827039668808e-06, "loss": 0.4748, "step": 5854 }, { "epoch": 0.5698296836982968, "grad_norm": 1.5952685732122698, "learning_rate": 4.1172755978962395e-06, "loss": 0.5302, "step": 5855 }, { "epoch": 0.56992700729927, "grad_norm": 1.8488532094666221, "learning_rate": 4.115724243840537e-06, "loss": 0.4407, "step": 5856 }, { "epoch": 0.5700243309002433, "grad_norm": 1.3853091530444268, "learning_rate": 4.114172977655863e-06, "loss": 0.2285, "step": 5857 }, { "epoch": 0.5701216545012165, "grad_norm": 1.6154002020331932, "learning_rate": 4.112621799496362e-06, "loss": 0.3314, "step": 5858 }, { "epoch": 0.5702189781021898, "grad_norm": 1.5521941006794486, "learning_rate": 4.111070709516178e-06, "loss": 0.5284, "step": 5859 }, { "epoch": 0.570316301703163, "grad_norm": 1.2797231182502546, "learning_rate": 4.109519707869447e-06, "loss": 0.2959, "step": 5860 }, { "epoch": 0.5704136253041363, "grad_norm": 2.052441780296125, "learning_rate": 4.107968794710287e-06, "loss": 0.4323, "step": 5861 }, { "epoch": 0.5705109489051094, "grad_norm": 1.573755693129952, "learning_rate": 4.106417970192817e-06, "loss": 0.3962, "step": 5862 }, { "epoch": 0.5706082725060827, "grad_norm": 1.4515060101738473, "learning_rate": 4.1048672344711416e-06, "loss": 0.306, "step": 5863 }, { "epoch": 0.570705596107056, "grad_norm": 2.3333979552266806, "learning_rate": 4.103316587699362e-06, "loss": 0.5012, "step": 5864 }, { "epoch": 0.5708029197080292, "grad_norm": 1.3698113250288773, "learning_rate": 4.101766030031562e-06, "loss": 0.2574, "step": 5865 }, { "epoch": 0.5709002433090025, "grad_norm": 1.5962661627636017, "learning_rate": 4.100215561621824e-06, "loss": 0.5743, "step": 5866 }, { "epoch": 0.5709975669099757, "grad_norm": 1.1767731091783282, "learning_rate": 4.098665182624219e-06, "loss": 0.2687, "step": 5867 }, { "epoch": 0.5710948905109489, "grad_norm": 1.481903266910654, "learning_rate": 4.09711489319281e-06, "loss": 0.371, "step": 5868 }, { "epoch": 0.5711922141119221, "grad_norm": 3.0869616390473187, "learning_rate": 4.095564693481647e-06, "loss": 0.4937, "step": 5869 }, { "epoch": 0.5712895377128954, "grad_norm": 1.4882391976250102, "learning_rate": 4.094014583644776e-06, "loss": 0.4624, "step": 5870 }, { "epoch": 0.5713868613138686, "grad_norm": 1.5597033076707199, "learning_rate": 4.092464563836235e-06, "loss": 0.2965, "step": 5871 }, { "epoch": 0.5714841849148419, "grad_norm": 1.4837817160889117, "learning_rate": 4.090914634210047e-06, "loss": 0.4667, "step": 5872 }, { "epoch": 0.5715815085158151, "grad_norm": 1.3993305785679218, "learning_rate": 4.0893647949202295e-06, "loss": 0.2788, "step": 5873 }, { "epoch": 0.5716788321167883, "grad_norm": 1.6431800927654712, "learning_rate": 4.087815046120793e-06, "loss": 0.4118, "step": 5874 }, { "epoch": 0.5717761557177615, "grad_norm": 1.5376803780790709, "learning_rate": 4.086265387965738e-06, "loss": 0.379, "step": 5875 }, { "epoch": 0.5718734793187348, "grad_norm": 1.2067547824117613, "learning_rate": 4.08471582060905e-06, "loss": 0.2887, "step": 5876 }, { "epoch": 0.571970802919708, "grad_norm": 1.6017665826050145, "learning_rate": 4.083166344204714e-06, "loss": 0.2924, "step": 5877 }, { "epoch": 0.5720681265206813, "grad_norm": 1.3717343825307584, "learning_rate": 4.081616958906704e-06, "loss": 0.3096, "step": 5878 }, { "epoch": 0.5721654501216545, "grad_norm": 1.5016467274235754, "learning_rate": 4.0800676648689784e-06, "loss": 0.4965, "step": 5879 }, { "epoch": 0.5722627737226277, "grad_norm": 1.5870899842392396, "learning_rate": 4.078518462245496e-06, "loss": 0.5304, "step": 5880 }, { "epoch": 0.5723600973236009, "grad_norm": 1.6020729554050612, "learning_rate": 4.076969351190199e-06, "loss": 0.4312, "step": 5881 }, { "epoch": 0.5724574209245742, "grad_norm": 1.802059687935232, "learning_rate": 4.075420331857027e-06, "loss": 0.5438, "step": 5882 }, { "epoch": 0.5725547445255474, "grad_norm": 1.4712870174602888, "learning_rate": 4.073871404399904e-06, "loss": 0.2845, "step": 5883 }, { "epoch": 0.5726520681265207, "grad_norm": 1.6255044887625074, "learning_rate": 4.072322568972748e-06, "loss": 0.5035, "step": 5884 }, { "epoch": 0.572749391727494, "grad_norm": 1.3042007726205176, "learning_rate": 4.0707738257294685e-06, "loss": 0.3706, "step": 5885 }, { "epoch": 0.5728467153284672, "grad_norm": 1.4403083569133328, "learning_rate": 4.069225174823968e-06, "loss": 0.4408, "step": 5886 }, { "epoch": 0.5729440389294403, "grad_norm": 1.4172122503136095, "learning_rate": 4.067676616410131e-06, "loss": 0.3956, "step": 5887 }, { "epoch": 0.5730413625304136, "grad_norm": 1.570813454958392, "learning_rate": 4.0661281506418415e-06, "loss": 0.3844, "step": 5888 }, { "epoch": 0.5731386861313869, "grad_norm": 2.69485000844076, "learning_rate": 4.064579777672974e-06, "loss": 0.5282, "step": 5889 }, { "epoch": 0.5732360097323601, "grad_norm": 2.729473669436288, "learning_rate": 4.0630314976573875e-06, "loss": 0.3562, "step": 5890 }, { "epoch": 0.5733333333333334, "grad_norm": 1.4667635531646994, "learning_rate": 4.061483310748936e-06, "loss": 0.4355, "step": 5891 }, { "epoch": 0.5734306569343066, "grad_norm": 1.640858224912779, "learning_rate": 4.059935217101466e-06, "loss": 0.2991, "step": 5892 }, { "epoch": 0.5735279805352798, "grad_norm": 1.6782606585064863, "learning_rate": 4.058387216868812e-06, "loss": 0.5343, "step": 5893 }, { "epoch": 0.573625304136253, "grad_norm": 1.3295772466437539, "learning_rate": 4.056839310204798e-06, "loss": 0.3044, "step": 5894 }, { "epoch": 0.5737226277372263, "grad_norm": 1.5424168625843726, "learning_rate": 4.055291497263241e-06, "loss": 0.3207, "step": 5895 }, { "epoch": 0.5738199513381995, "grad_norm": 1.5329511107321385, "learning_rate": 4.053743778197951e-06, "loss": 0.4069, "step": 5896 }, { "epoch": 0.5739172749391728, "grad_norm": 1.2981917808667458, "learning_rate": 4.052196153162721e-06, "loss": 0.3858, "step": 5897 }, { "epoch": 0.574014598540146, "grad_norm": 1.4777369596744594, "learning_rate": 4.0506486223113416e-06, "loss": 0.3397, "step": 5898 }, { "epoch": 0.5741119221411192, "grad_norm": 1.4437053095518653, "learning_rate": 4.049101185797592e-06, "loss": 0.2613, "step": 5899 }, { "epoch": 0.5742092457420924, "grad_norm": 1.440015250893471, "learning_rate": 4.047553843775245e-06, "loss": 0.3958, "step": 5900 }, { "epoch": 0.5743065693430657, "grad_norm": 1.661304462765742, "learning_rate": 4.046006596398055e-06, "loss": 0.4794, "step": 5901 }, { "epoch": 0.5744038929440389, "grad_norm": 1.4906714762034023, "learning_rate": 4.044459443819777e-06, "loss": 0.4079, "step": 5902 }, { "epoch": 0.5745012165450122, "grad_norm": 1.7062130152009058, "learning_rate": 4.042912386194151e-06, "loss": 0.2706, "step": 5903 }, { "epoch": 0.5745985401459854, "grad_norm": 1.497257427508622, "learning_rate": 4.04136542367491e-06, "loss": 0.2953, "step": 5904 }, { "epoch": 0.5746958637469587, "grad_norm": 1.5618541145531117, "learning_rate": 4.039818556415775e-06, "loss": 0.2384, "step": 5905 }, { "epoch": 0.5747931873479318, "grad_norm": 1.4904780157464865, "learning_rate": 4.038271784570461e-06, "loss": 0.4474, "step": 5906 }, { "epoch": 0.5748905109489051, "grad_norm": 1.5448198634442605, "learning_rate": 4.036725108292673e-06, "loss": 0.2991, "step": 5907 }, { "epoch": 0.5749878345498783, "grad_norm": 1.0511404224851633, "learning_rate": 4.035178527736099e-06, "loss": 0.2233, "step": 5908 }, { "epoch": 0.5750851581508516, "grad_norm": 1.9371543083664262, "learning_rate": 4.033632043054429e-06, "loss": 0.4486, "step": 5909 }, { "epoch": 0.5751824817518248, "grad_norm": 1.297375410121607, "learning_rate": 4.032085654401337e-06, "loss": 0.4129, "step": 5910 }, { "epoch": 0.5752798053527981, "grad_norm": 2.079135046994335, "learning_rate": 4.030539361930491e-06, "loss": 0.3684, "step": 5911 }, { "epoch": 0.5753771289537712, "grad_norm": 1.0717900790281243, "learning_rate": 4.028993165795541e-06, "loss": 0.2739, "step": 5912 }, { "epoch": 0.5754744525547445, "grad_norm": 1.2387767967459138, "learning_rate": 4.027447066150138e-06, "loss": 0.3119, "step": 5913 }, { "epoch": 0.5755717761557178, "grad_norm": 1.3658545399373896, "learning_rate": 4.02590106314792e-06, "loss": 0.4284, "step": 5914 }, { "epoch": 0.575669099756691, "grad_norm": 1.4542222341215179, "learning_rate": 4.02435515694251e-06, "loss": 0.3987, "step": 5915 }, { "epoch": 0.5757664233576643, "grad_norm": 1.356249933198848, "learning_rate": 4.022809347687527e-06, "loss": 0.3397, "step": 5916 }, { "epoch": 0.5758637469586375, "grad_norm": 1.3049778415362594, "learning_rate": 4.021263635536581e-06, "loss": 0.2848, "step": 5917 }, { "epoch": 0.5759610705596107, "grad_norm": 1.331455273987577, "learning_rate": 4.019718020643269e-06, "loss": 0.3336, "step": 5918 }, { "epoch": 0.5760583941605839, "grad_norm": 1.307637289290154, "learning_rate": 4.018172503161179e-06, "loss": 0.3612, "step": 5919 }, { "epoch": 0.5761557177615572, "grad_norm": 1.7867104340406879, "learning_rate": 4.016627083243891e-06, "loss": 0.2943, "step": 5920 }, { "epoch": 0.5762530413625304, "grad_norm": 1.360497329252258, "learning_rate": 4.015081761044975e-06, "loss": 0.3606, "step": 5921 }, { "epoch": 0.5763503649635037, "grad_norm": 1.1556745592201176, "learning_rate": 4.013536536717991e-06, "loss": 0.283, "step": 5922 }, { "epoch": 0.5764476885644769, "grad_norm": 1.6250093899969087, "learning_rate": 4.011991410416486e-06, "loss": 0.5778, "step": 5923 }, { "epoch": 0.5765450121654502, "grad_norm": 1.6953126406339947, "learning_rate": 4.010446382294001e-06, "loss": 0.4984, "step": 5924 }, { "epoch": 0.5766423357664233, "grad_norm": 1.4142486281163247, "learning_rate": 4.008901452504069e-06, "loss": 0.2631, "step": 5925 }, { "epoch": 0.5767396593673966, "grad_norm": 1.402890112232536, "learning_rate": 4.007356621200208e-06, "loss": 0.3787, "step": 5926 }, { "epoch": 0.5768369829683698, "grad_norm": 1.3405076764750274, "learning_rate": 4.005811888535929e-06, "loss": 0.3728, "step": 5927 }, { "epoch": 0.5769343065693431, "grad_norm": 1.2939337503168566, "learning_rate": 4.0042672546647345e-06, "loss": 0.3303, "step": 5928 }, { "epoch": 0.5770316301703163, "grad_norm": 1.4749454746836559, "learning_rate": 4.002722719740115e-06, "loss": 0.5413, "step": 5929 }, { "epoch": 0.5771289537712896, "grad_norm": 1.4847428518554533, "learning_rate": 4.001178283915552e-06, "loss": 0.3488, "step": 5930 }, { "epoch": 0.5772262773722627, "grad_norm": 1.3332206360555243, "learning_rate": 3.999633947344516e-06, "loss": 0.3981, "step": 5931 }, { "epoch": 0.577323600973236, "grad_norm": 1.4944277418937246, "learning_rate": 3.99808971018047e-06, "loss": 0.408, "step": 5932 }, { "epoch": 0.5774209245742092, "grad_norm": 1.737536187103843, "learning_rate": 3.996545572576866e-06, "loss": 0.5298, "step": 5933 }, { "epoch": 0.5775182481751825, "grad_norm": 1.2474936630760807, "learning_rate": 3.995001534687145e-06, "loss": 0.3373, "step": 5934 }, { "epoch": 0.5776155717761557, "grad_norm": 1.4019318297079624, "learning_rate": 3.993457596664738e-06, "loss": 0.4404, "step": 5935 }, { "epoch": 0.577712895377129, "grad_norm": 1.5277272399095878, "learning_rate": 3.99191375866307e-06, "loss": 0.3877, "step": 5936 }, { "epoch": 0.5778102189781021, "grad_norm": 1.3728494732881964, "learning_rate": 3.99037002083555e-06, "loss": 0.4021, "step": 5937 }, { "epoch": 0.5779075425790754, "grad_norm": 1.4679863750542683, "learning_rate": 3.988826383335582e-06, "loss": 0.2306, "step": 5938 }, { "epoch": 0.5780048661800486, "grad_norm": 1.4183002010977648, "learning_rate": 3.987282846316557e-06, "loss": 0.3651, "step": 5939 }, { "epoch": 0.5781021897810219, "grad_norm": 1.368718973443921, "learning_rate": 3.98573940993186e-06, "loss": 0.2663, "step": 5940 }, { "epoch": 0.5781995133819952, "grad_norm": 1.424469979748807, "learning_rate": 3.98419607433486e-06, "loss": 0.4032, "step": 5941 }, { "epoch": 0.5782968369829684, "grad_norm": 1.5021555829848496, "learning_rate": 3.98265283967892e-06, "loss": 0.3981, "step": 5942 }, { "epoch": 0.5783941605839416, "grad_norm": 1.1988530300284257, "learning_rate": 3.9811097061173955e-06, "loss": 0.274, "step": 5943 }, { "epoch": 0.5784914841849148, "grad_norm": 1.4405791434646016, "learning_rate": 3.979566673803623e-06, "loss": 0.3331, "step": 5944 }, { "epoch": 0.5785888077858881, "grad_norm": 1.534921565086639, "learning_rate": 3.978023742890937e-06, "loss": 0.4452, "step": 5945 }, { "epoch": 0.5786861313868613, "grad_norm": 1.4450376507034977, "learning_rate": 3.9764809135326606e-06, "loss": 0.4099, "step": 5946 }, { "epoch": 0.5787834549878346, "grad_norm": 1.5192232187486505, "learning_rate": 3.974938185882106e-06, "loss": 0.3869, "step": 5947 }, { "epoch": 0.5788807785888078, "grad_norm": 1.962930100077256, "learning_rate": 3.973395560092572e-06, "loss": 0.3893, "step": 5948 }, { "epoch": 0.5789781021897811, "grad_norm": 1.5090216497507596, "learning_rate": 3.971853036317353e-06, "loss": 0.2462, "step": 5949 }, { "epoch": 0.5790754257907542, "grad_norm": 1.4732758612376924, "learning_rate": 3.970310614709729e-06, "loss": 0.4167, "step": 5950 }, { "epoch": 0.5791727493917275, "grad_norm": 1.5161312083783107, "learning_rate": 3.968768295422974e-06, "loss": 0.4823, "step": 5951 }, { "epoch": 0.5792700729927007, "grad_norm": 1.271524314428882, "learning_rate": 3.967226078610346e-06, "loss": 0.2618, "step": 5952 }, { "epoch": 0.579367396593674, "grad_norm": 1.6686446453083665, "learning_rate": 3.965683964425098e-06, "loss": 0.5373, "step": 5953 }, { "epoch": 0.5794647201946472, "grad_norm": 1.5677241158773498, "learning_rate": 3.964141953020472e-06, "loss": 0.4947, "step": 5954 }, { "epoch": 0.5795620437956205, "grad_norm": 1.6303707281605235, "learning_rate": 3.962600044549694e-06, "loss": 0.3536, "step": 5955 }, { "epoch": 0.5796593673965936, "grad_norm": 1.390322405864935, "learning_rate": 3.961058239165987e-06, "loss": 0.2983, "step": 5956 }, { "epoch": 0.5797566909975669, "grad_norm": 1.2547340868974297, "learning_rate": 3.959516537022561e-06, "loss": 0.4017, "step": 5957 }, { "epoch": 0.5798540145985401, "grad_norm": 1.6559553064129353, "learning_rate": 3.95797493827262e-06, "loss": 0.3667, "step": 5958 }, { "epoch": 0.5799513381995134, "grad_norm": 1.400516009603213, "learning_rate": 3.956433443069346e-06, "loss": 0.2541, "step": 5959 }, { "epoch": 0.5800486618004866, "grad_norm": 1.4053238255928961, "learning_rate": 3.954892051565923e-06, "loss": 0.4111, "step": 5960 }, { "epoch": 0.5801459854014599, "grad_norm": 1.5918312490297373, "learning_rate": 3.953350763915521e-06, "loss": 0.4221, "step": 5961 }, { "epoch": 0.580243309002433, "grad_norm": 1.68289191183515, "learning_rate": 3.951809580271295e-06, "loss": 0.4138, "step": 5962 }, { "epoch": 0.5803406326034063, "grad_norm": 1.72947481772404, "learning_rate": 3.950268500786396e-06, "loss": 0.5751, "step": 5963 }, { "epoch": 0.5804379562043795, "grad_norm": 1.3780465620559466, "learning_rate": 3.948727525613961e-06, "loss": 0.37, "step": 5964 }, { "epoch": 0.5805352798053528, "grad_norm": 1.4712553368068928, "learning_rate": 3.94718665490712e-06, "loss": 0.2758, "step": 5965 }, { "epoch": 0.580632603406326, "grad_norm": 1.5602113365415762, "learning_rate": 3.9456458888189856e-06, "loss": 0.3545, "step": 5966 }, { "epoch": 0.5807299270072993, "grad_norm": 1.508331841877293, "learning_rate": 3.944105227502667e-06, "loss": 0.3542, "step": 5967 }, { "epoch": 0.5808272506082726, "grad_norm": 1.584067157882251, "learning_rate": 3.942564671111262e-06, "loss": 0.6656, "step": 5968 }, { "epoch": 0.5809245742092457, "grad_norm": 1.4773530004606112, "learning_rate": 3.9410242197978575e-06, "loss": 0.2512, "step": 5969 }, { "epoch": 0.581021897810219, "grad_norm": 1.4681060678066031, "learning_rate": 3.939483873715525e-06, "loss": 0.4127, "step": 5970 }, { "epoch": 0.5811192214111922, "grad_norm": 1.5003292993845707, "learning_rate": 3.937943633017331e-06, "loss": 0.4188, "step": 5971 }, { "epoch": 0.5812165450121655, "grad_norm": 1.4828753475139602, "learning_rate": 3.936403497856333e-06, "loss": 0.3884, "step": 5972 }, { "epoch": 0.5813138686131387, "grad_norm": 1.4017949212477416, "learning_rate": 3.934863468385572e-06, "loss": 0.18, "step": 5973 }, { "epoch": 0.581411192214112, "grad_norm": 1.5505106423217885, "learning_rate": 3.933323544758083e-06, "loss": 0.348, "step": 5974 }, { "epoch": 0.5815085158150851, "grad_norm": 1.7306909698333623, "learning_rate": 3.931783727126888e-06, "loss": 0.4749, "step": 5975 }, { "epoch": 0.5816058394160584, "grad_norm": 1.3779307256596018, "learning_rate": 3.930244015645004e-06, "loss": 0.383, "step": 5976 }, { "epoch": 0.5817031630170316, "grad_norm": 1.3276773932659995, "learning_rate": 3.928704410465426e-06, "loss": 0.4589, "step": 5977 }, { "epoch": 0.5818004866180049, "grad_norm": 1.5639466264695725, "learning_rate": 3.92716491174115e-06, "loss": 0.3932, "step": 5978 }, { "epoch": 0.5818978102189781, "grad_norm": 1.552109156997534, "learning_rate": 3.925625519625159e-06, "loss": 0.4782, "step": 5979 }, { "epoch": 0.5819951338199514, "grad_norm": 1.3282942720139936, "learning_rate": 3.924086234270417e-06, "loss": 0.469, "step": 5980 }, { "epoch": 0.5820924574209245, "grad_norm": 1.24846177823519, "learning_rate": 3.922547055829888e-06, "loss": 0.2714, "step": 5981 }, { "epoch": 0.5821897810218978, "grad_norm": 1.7363894336977386, "learning_rate": 3.921007984456521e-06, "loss": 0.6387, "step": 5982 }, { "epoch": 0.582287104622871, "grad_norm": 1.4351834414527354, "learning_rate": 3.919469020303254e-06, "loss": 0.5076, "step": 5983 }, { "epoch": 0.5823844282238443, "grad_norm": 1.2278370643463414, "learning_rate": 3.917930163523014e-06, "loss": 0.2925, "step": 5984 }, { "epoch": 0.5824817518248175, "grad_norm": 1.596615328643663, "learning_rate": 3.9163914142687185e-06, "loss": 0.4553, "step": 5985 }, { "epoch": 0.5825790754257908, "grad_norm": 1.8893893909568775, "learning_rate": 3.914852772693274e-06, "loss": 0.584, "step": 5986 }, { "epoch": 0.5826763990267639, "grad_norm": 1.5446687628466054, "learning_rate": 3.913314238949579e-06, "loss": 0.3296, "step": 5987 }, { "epoch": 0.5827737226277372, "grad_norm": 1.4060653565303132, "learning_rate": 3.911775813190512e-06, "loss": 0.5253, "step": 5988 }, { "epoch": 0.5828710462287104, "grad_norm": 1.8736522281069823, "learning_rate": 3.910237495568953e-06, "loss": 0.3444, "step": 5989 }, { "epoch": 0.5829683698296837, "grad_norm": 1.6316058797426887, "learning_rate": 3.908699286237766e-06, "loss": 0.4836, "step": 5990 }, { "epoch": 0.583065693430657, "grad_norm": 1.6721928374594346, "learning_rate": 3.9071611853498e-06, "loss": 0.5202, "step": 5991 }, { "epoch": 0.5831630170316302, "grad_norm": 1.5119042886433613, "learning_rate": 3.905623193057898e-06, "loss": 0.5214, "step": 5992 }, { "epoch": 0.5832603406326035, "grad_norm": 1.5551785407648202, "learning_rate": 3.904085309514892e-06, "loss": 0.3067, "step": 5993 }, { "epoch": 0.5833576642335766, "grad_norm": 3.193003671416416, "learning_rate": 3.9025475348736045e-06, "loss": 0.367, "step": 5994 }, { "epoch": 0.5834549878345499, "grad_norm": 1.5549766999979842, "learning_rate": 3.901009869286841e-06, "loss": 0.439, "step": 5995 }, { "epoch": 0.5835523114355231, "grad_norm": 1.690967247355317, "learning_rate": 3.899472312907402e-06, "loss": 0.4892, "step": 5996 }, { "epoch": 0.5836496350364964, "grad_norm": 1.4513771456172895, "learning_rate": 3.897934865888079e-06, "loss": 0.4441, "step": 5997 }, { "epoch": 0.5837469586374696, "grad_norm": 1.373683125444904, "learning_rate": 3.896397528381642e-06, "loss": 0.4698, "step": 5998 }, { "epoch": 0.5838442822384429, "grad_norm": 2.093406649093295, "learning_rate": 3.894860300540861e-06, "loss": 0.6103, "step": 5999 }, { "epoch": 0.583941605839416, "grad_norm": 1.5935136208360565, "learning_rate": 3.893323182518492e-06, "loss": 0.4388, "step": 6000 }, { "epoch": 0.5840389294403893, "grad_norm": 1.3272675382885624, "learning_rate": 3.891786174467281e-06, "loss": 0.352, "step": 6001 }, { "epoch": 0.5841362530413625, "grad_norm": 1.159572570089231, "learning_rate": 3.8902492765399565e-06, "loss": 0.261, "step": 6002 }, { "epoch": 0.5842335766423358, "grad_norm": 1.6999281531466546, "learning_rate": 3.888712488889243e-06, "loss": 0.4206, "step": 6003 }, { "epoch": 0.584330900243309, "grad_norm": 2.632293650046838, "learning_rate": 3.8871758116678536e-06, "loss": 0.3876, "step": 6004 }, { "epoch": 0.5844282238442823, "grad_norm": 1.2638049755722311, "learning_rate": 3.885639245028489e-06, "loss": 0.3541, "step": 6005 }, { "epoch": 0.5845255474452554, "grad_norm": 1.5066151979539468, "learning_rate": 3.884102789123835e-06, "loss": 0.4295, "step": 6006 }, { "epoch": 0.5846228710462287, "grad_norm": 1.5095710579673731, "learning_rate": 3.882566444106573e-06, "loss": 0.392, "step": 6007 }, { "epoch": 0.5847201946472019, "grad_norm": 1.7154230690330807, "learning_rate": 3.881030210129373e-06, "loss": 0.5574, "step": 6008 }, { "epoch": 0.5848175182481752, "grad_norm": 1.436552647774059, "learning_rate": 3.8794940873448865e-06, "loss": 0.4207, "step": 6009 }, { "epoch": 0.5849148418491484, "grad_norm": 1.5310450046883748, "learning_rate": 3.877958075905761e-06, "loss": 0.4533, "step": 6010 }, { "epoch": 0.5850121654501217, "grad_norm": 1.4218251984646402, "learning_rate": 3.876422175964632e-06, "loss": 0.4838, "step": 6011 }, { "epoch": 0.5851094890510949, "grad_norm": 1.6683972275141359, "learning_rate": 3.874886387674124e-06, "loss": 0.4184, "step": 6012 }, { "epoch": 0.5852068126520681, "grad_norm": 1.5049663027922862, "learning_rate": 3.873350711186845e-06, "loss": 0.4361, "step": 6013 }, { "epoch": 0.5853041362530413, "grad_norm": 1.4013598717687963, "learning_rate": 3.871815146655398e-06, "loss": 0.3404, "step": 6014 }, { "epoch": 0.5854014598540146, "grad_norm": 1.6941280886111705, "learning_rate": 3.870279694232374e-06, "loss": 0.4737, "step": 6015 }, { "epoch": 0.5854987834549878, "grad_norm": 1.6982774253729849, "learning_rate": 3.868744354070351e-06, "loss": 0.5216, "step": 6016 }, { "epoch": 0.5855961070559611, "grad_norm": 1.3167767102494852, "learning_rate": 3.8672091263218965e-06, "loss": 0.3244, "step": 6017 }, { "epoch": 0.5856934306569344, "grad_norm": 1.0807635320998275, "learning_rate": 3.865674011139567e-06, "loss": 0.2452, "step": 6018 }, { "epoch": 0.5857907542579075, "grad_norm": 1.467439269542744, "learning_rate": 3.8641390086759095e-06, "loss": 0.2768, "step": 6019 }, { "epoch": 0.5858880778588808, "grad_norm": 1.5529447209135872, "learning_rate": 3.862604119083456e-06, "loss": 0.3467, "step": 6020 }, { "epoch": 0.585985401459854, "grad_norm": 1.3118746493894737, "learning_rate": 3.86106934251473e-06, "loss": 0.3916, "step": 6021 }, { "epoch": 0.5860827250608273, "grad_norm": 1.594330476253586, "learning_rate": 3.859534679122244e-06, "loss": 0.4206, "step": 6022 }, { "epoch": 0.5861800486618005, "grad_norm": 1.6629647946097874, "learning_rate": 3.8580001290585004e-06, "loss": 0.5061, "step": 6023 }, { "epoch": 0.5862773722627738, "grad_norm": 1.5447139865364772, "learning_rate": 3.8564656924759824e-06, "loss": 0.3372, "step": 6024 }, { "epoch": 0.5863746958637469, "grad_norm": 1.4120860750800777, "learning_rate": 3.854931369527172e-06, "loss": 0.3227, "step": 6025 }, { "epoch": 0.5864720194647202, "grad_norm": 1.4444649350514327, "learning_rate": 3.853397160364537e-06, "loss": 0.3111, "step": 6026 }, { "epoch": 0.5865693430656934, "grad_norm": 2.2801677279097525, "learning_rate": 3.851863065140528e-06, "loss": 0.354, "step": 6027 }, { "epoch": 0.5866666666666667, "grad_norm": 1.1520906849374228, "learning_rate": 3.850329084007594e-06, "loss": 0.2854, "step": 6028 }, { "epoch": 0.5867639902676399, "grad_norm": 1.6821634186635506, "learning_rate": 3.8487952171181656e-06, "loss": 0.5804, "step": 6029 }, { "epoch": 0.5868613138686132, "grad_norm": 1.7129198548140288, "learning_rate": 3.8472614646246635e-06, "loss": 0.3781, "step": 6030 }, { "epoch": 0.5869586374695863, "grad_norm": 1.3054619049860754, "learning_rate": 3.8457278266794985e-06, "loss": 0.3634, "step": 6031 }, { "epoch": 0.5870559610705596, "grad_norm": 1.5114654407645212, "learning_rate": 3.844194303435068e-06, "loss": 0.4674, "step": 6032 }, { "epoch": 0.5871532846715328, "grad_norm": 1.463934043696164, "learning_rate": 3.842660895043763e-06, "loss": 0.3581, "step": 6033 }, { "epoch": 0.5872506082725061, "grad_norm": 1.5736029771843554, "learning_rate": 3.841127601657952e-06, "loss": 0.489, "step": 6034 }, { "epoch": 0.5873479318734793, "grad_norm": 1.5881308436070423, "learning_rate": 3.839594423430006e-06, "loss": 0.4743, "step": 6035 }, { "epoch": 0.5874452554744526, "grad_norm": 1.2930329643968506, "learning_rate": 3.838061360512273e-06, "loss": 0.3213, "step": 6036 }, { "epoch": 0.5875425790754258, "grad_norm": 1.5166564263357916, "learning_rate": 3.8365284130571e-06, "loss": 0.3018, "step": 6037 }, { "epoch": 0.587639902676399, "grad_norm": 1.6269815540921038, "learning_rate": 3.834995581216812e-06, "loss": 0.437, "step": 6038 }, { "epoch": 0.5877372262773722, "grad_norm": 1.526687051617541, "learning_rate": 3.833462865143729e-06, "loss": 0.4139, "step": 6039 }, { "epoch": 0.5878345498783455, "grad_norm": 1.6329604086436111, "learning_rate": 3.831930264990159e-06, "loss": 0.4825, "step": 6040 }, { "epoch": 0.5879318734793187, "grad_norm": 1.4586937958416275, "learning_rate": 3.830397780908396e-06, "loss": 0.4184, "step": 6041 }, { "epoch": 0.588029197080292, "grad_norm": 1.0949958789869823, "learning_rate": 3.828865413050724e-06, "loss": 0.2606, "step": 6042 }, { "epoch": 0.5881265206812653, "grad_norm": 1.706951916420232, "learning_rate": 3.827333161569416e-06, "loss": 0.5538, "step": 6043 }, { "epoch": 0.5882238442822384, "grad_norm": 1.2414312399767005, "learning_rate": 3.825801026616735e-06, "loss": 0.276, "step": 6044 }, { "epoch": 0.5883211678832116, "grad_norm": 1.4168850038610261, "learning_rate": 3.824269008344925e-06, "loss": 0.3251, "step": 6045 }, { "epoch": 0.5884184914841849, "grad_norm": 1.6149522749518992, "learning_rate": 3.822737106906226e-06, "loss": 0.4471, "step": 6046 }, { "epoch": 0.5885158150851582, "grad_norm": 1.6257326968494004, "learning_rate": 3.821205322452863e-06, "loss": 0.5379, "step": 6047 }, { "epoch": 0.5886131386861314, "grad_norm": 1.6789960990710493, "learning_rate": 3.819673655137056e-06, "loss": 0.5337, "step": 6048 }, { "epoch": 0.5887104622871047, "grad_norm": 1.5794864485093818, "learning_rate": 3.818142105111e-06, "loss": 0.4465, "step": 6049 }, { "epoch": 0.5888077858880778, "grad_norm": 1.3444717597988916, "learning_rate": 3.816610672526891e-06, "loss": 0.4042, "step": 6050 }, { "epoch": 0.5889051094890511, "grad_norm": 1.554026093999093, "learning_rate": 3.815079357536907e-06, "loss": 0.3479, "step": 6051 }, { "epoch": 0.5890024330900243, "grad_norm": 1.5256975673496376, "learning_rate": 3.813548160293214e-06, "loss": 0.363, "step": 6052 }, { "epoch": 0.5890997566909976, "grad_norm": 1.5990062130729201, "learning_rate": 3.8120170809479703e-06, "loss": 0.517, "step": 6053 }, { "epoch": 0.5891970802919708, "grad_norm": 1.3862958236289114, "learning_rate": 3.810486119653319e-06, "loss": 0.3521, "step": 6054 }, { "epoch": 0.5892944038929441, "grad_norm": 1.5595567066074474, "learning_rate": 3.808955276561396e-06, "loss": 0.4872, "step": 6055 }, { "epoch": 0.5893917274939173, "grad_norm": 1.4942331721746605, "learning_rate": 3.807424551824316e-06, "loss": 0.4232, "step": 6056 }, { "epoch": 0.5894890510948905, "grad_norm": 1.6271760016251753, "learning_rate": 3.805893945594191e-06, "loss": 0.1589, "step": 6057 }, { "epoch": 0.5895863746958637, "grad_norm": 1.259542850639245, "learning_rate": 3.804363458023119e-06, "loss": 0.3037, "step": 6058 }, { "epoch": 0.589683698296837, "grad_norm": 2.0986186116245333, "learning_rate": 3.8028330892631883e-06, "loss": 0.4256, "step": 6059 }, { "epoch": 0.5897810218978102, "grad_norm": 1.1096117008912445, "learning_rate": 3.8013028394664663e-06, "loss": 0.221, "step": 6060 }, { "epoch": 0.5898783454987835, "grad_norm": 1.5376999143562857, "learning_rate": 3.7997727087850184e-06, "loss": 0.4521, "step": 6061 }, { "epoch": 0.5899756690997567, "grad_norm": 1.4971816288421493, "learning_rate": 3.7982426973708947e-06, "loss": 0.3751, "step": 6062 }, { "epoch": 0.5900729927007299, "grad_norm": 1.4982052238160424, "learning_rate": 3.796712805376132e-06, "loss": 0.3096, "step": 6063 }, { "epoch": 0.5901703163017031, "grad_norm": 1.7366305968528648, "learning_rate": 3.795183032952758e-06, "loss": 0.3385, "step": 6064 }, { "epoch": 0.5902676399026764, "grad_norm": 1.4992425118571515, "learning_rate": 3.793653380252786e-06, "loss": 0.3124, "step": 6065 }, { "epoch": 0.5903649635036496, "grad_norm": 1.7940707674219727, "learning_rate": 3.7921238474282208e-06, "loss": 0.5323, "step": 6066 }, { "epoch": 0.5904622871046229, "grad_norm": 1.3390490786081235, "learning_rate": 3.7905944346310485e-06, "loss": 0.3371, "step": 6067 }, { "epoch": 0.5905596107055961, "grad_norm": 1.3833066843905326, "learning_rate": 3.7890651420132517e-06, "loss": 0.3587, "step": 6068 }, { "epoch": 0.5906569343065693, "grad_norm": 1.3927231294487479, "learning_rate": 3.7875359697267967e-06, "loss": 0.3751, "step": 6069 }, { "epoch": 0.5907542579075425, "grad_norm": 1.7130057319059748, "learning_rate": 3.7860069179236393e-06, "loss": 0.5281, "step": 6070 }, { "epoch": 0.5908515815085158, "grad_norm": 1.3074475179026124, "learning_rate": 3.784477986755718e-06, "loss": 0.2435, "step": 6071 }, { "epoch": 0.590948905109489, "grad_norm": 1.5623722024152904, "learning_rate": 3.7829491763749666e-06, "loss": 0.4698, "step": 6072 }, { "epoch": 0.5910462287104623, "grad_norm": 1.3230466803794698, "learning_rate": 3.781420486933305e-06, "loss": 0.3305, "step": 6073 }, { "epoch": 0.5911435523114356, "grad_norm": 1.5272031753513022, "learning_rate": 3.7798919185826364e-06, "loss": 0.503, "step": 6074 }, { "epoch": 0.5912408759124088, "grad_norm": 1.5784992491871666, "learning_rate": 3.7783634714748592e-06, "loss": 0.5169, "step": 6075 }, { "epoch": 0.591338199513382, "grad_norm": 1.5153655783840159, "learning_rate": 3.776835145761854e-06, "loss": 0.3641, "step": 6076 }, { "epoch": 0.5914355231143552, "grad_norm": 1.7743615923163676, "learning_rate": 3.7753069415954936e-06, "loss": 0.3779, "step": 6077 }, { "epoch": 0.5915328467153285, "grad_norm": 1.2694613862988158, "learning_rate": 3.7737788591276337e-06, "loss": 0.2303, "step": 6078 }, { "epoch": 0.5916301703163017, "grad_norm": 1.663601878714137, "learning_rate": 3.7722508985101225e-06, "loss": 0.3174, "step": 6079 }, { "epoch": 0.591727493917275, "grad_norm": 1.7689112235953703, "learning_rate": 3.7707230598947964e-06, "loss": 0.5926, "step": 6080 }, { "epoch": 0.5918248175182482, "grad_norm": 1.4135407386201395, "learning_rate": 3.769195343433473e-06, "loss": 0.4129, "step": 6081 }, { "epoch": 0.5919221411192214, "grad_norm": 1.5315687665205964, "learning_rate": 3.767667749277965e-06, "loss": 0.4421, "step": 6082 }, { "epoch": 0.5920194647201946, "grad_norm": 1.5304124838362247, "learning_rate": 3.7661402775800703e-06, "loss": 0.4755, "step": 6083 }, { "epoch": 0.5921167883211679, "grad_norm": 1.5824857082595416, "learning_rate": 3.7646129284915754e-06, "loss": 0.4987, "step": 6084 }, { "epoch": 0.5922141119221411, "grad_norm": 1.4836716441134765, "learning_rate": 3.763085702164252e-06, "loss": 0.3379, "step": 6085 }, { "epoch": 0.5923114355231144, "grad_norm": 1.480465529143924, "learning_rate": 3.7615585987498627e-06, "loss": 0.395, "step": 6086 }, { "epoch": 0.5924087591240876, "grad_norm": 1.3348532398149553, "learning_rate": 3.760031618400157e-06, "loss": 0.1832, "step": 6087 }, { "epoch": 0.5925060827250608, "grad_norm": 1.23992401805474, "learning_rate": 3.7585047612668725e-06, "loss": 0.2975, "step": 6088 }, { "epoch": 0.592603406326034, "grad_norm": 1.438004612465625, "learning_rate": 3.7569780275017313e-06, "loss": 0.4668, "step": 6089 }, { "epoch": 0.5927007299270073, "grad_norm": 1.2096369496620847, "learning_rate": 3.7554514172564483e-06, "loss": 0.2963, "step": 6090 }, { "epoch": 0.5927980535279805, "grad_norm": 1.505986189790302, "learning_rate": 3.753924930682725e-06, "loss": 0.3663, "step": 6091 }, { "epoch": 0.5928953771289538, "grad_norm": 1.5892521837839675, "learning_rate": 3.752398567932245e-06, "loss": 0.4481, "step": 6092 }, { "epoch": 0.592992700729927, "grad_norm": 1.15344950754204, "learning_rate": 3.7508723291566857e-06, "loss": 0.2643, "step": 6093 }, { "epoch": 0.5930900243309002, "grad_norm": 1.5285078134783145, "learning_rate": 3.7493462145077107e-06, "loss": 0.5585, "step": 6094 }, { "epoch": 0.5931873479318734, "grad_norm": 1.1830124639401085, "learning_rate": 3.7478202241369733e-06, "loss": 0.2962, "step": 6095 }, { "epoch": 0.5932846715328467, "grad_norm": 1.7168858909747537, "learning_rate": 3.7462943581961077e-06, "loss": 0.5145, "step": 6096 }, { "epoch": 0.59338199513382, "grad_norm": 1.662499495197879, "learning_rate": 3.7447686168367426e-06, "loss": 0.5683, "step": 6097 }, { "epoch": 0.5934793187347932, "grad_norm": 1.1240873343487263, "learning_rate": 3.743243000210493e-06, "loss": 0.2461, "step": 6098 }, { "epoch": 0.5935766423357665, "grad_norm": 1.2828864834786387, "learning_rate": 3.7417175084689573e-06, "loss": 0.31, "step": 6099 }, { "epoch": 0.5936739659367397, "grad_norm": 1.48494597044505, "learning_rate": 3.7401921417637264e-06, "loss": 0.3407, "step": 6100 }, { "epoch": 0.5937712895377129, "grad_norm": 1.658622194496362, "learning_rate": 3.738666900246377e-06, "loss": 0.4679, "step": 6101 }, { "epoch": 0.5938686131386861, "grad_norm": 1.2506596255334679, "learning_rate": 3.7371417840684756e-06, "loss": 0.3717, "step": 6102 }, { "epoch": 0.5939659367396594, "grad_norm": 1.5335644727449282, "learning_rate": 3.7356167933815677e-06, "loss": 0.4834, "step": 6103 }, { "epoch": 0.5940632603406326, "grad_norm": 1.414037124659017, "learning_rate": 3.7340919283371974e-06, "loss": 0.3945, "step": 6104 }, { "epoch": 0.5941605839416059, "grad_norm": 1.7992199399598825, "learning_rate": 3.73256718908689e-06, "loss": 0.3953, "step": 6105 }, { "epoch": 0.5942579075425791, "grad_norm": 1.3429696234522137, "learning_rate": 3.731042575782161e-06, "loss": 0.3966, "step": 6106 }, { "epoch": 0.5943552311435523, "grad_norm": 1.7734539014419153, "learning_rate": 3.72951808857451e-06, "loss": 0.2219, "step": 6107 }, { "epoch": 0.5944525547445255, "grad_norm": 1.862840260396579, "learning_rate": 3.727993727615428e-06, "loss": 0.2874, "step": 6108 }, { "epoch": 0.5945498783454988, "grad_norm": 1.7581295829466408, "learning_rate": 3.7264694930563916e-06, "loss": 0.2755, "step": 6109 }, { "epoch": 0.594647201946472, "grad_norm": 1.3464145855901808, "learning_rate": 3.7249453850488635e-06, "loss": 0.3241, "step": 6110 }, { "epoch": 0.5947445255474453, "grad_norm": 1.5880395648168935, "learning_rate": 3.7234214037442964e-06, "loss": 0.5298, "step": 6111 }, { "epoch": 0.5948418491484185, "grad_norm": 1.5626657016629657, "learning_rate": 3.721897549294129e-06, "loss": 0.3321, "step": 6112 }, { "epoch": 0.5949391727493917, "grad_norm": 1.2336231307915122, "learning_rate": 3.72037382184979e-06, "loss": 0.3928, "step": 6113 }, { "epoch": 0.5950364963503649, "grad_norm": 1.0442703640271636, "learning_rate": 3.7188502215626876e-06, "loss": 0.1625, "step": 6114 }, { "epoch": 0.5951338199513382, "grad_norm": 1.2218465348071996, "learning_rate": 3.7173267485842274e-06, "loss": 0.3233, "step": 6115 }, { "epoch": 0.5952311435523114, "grad_norm": 1.6977370543544572, "learning_rate": 3.7158034030657973e-06, "loss": 0.3452, "step": 6116 }, { "epoch": 0.5953284671532847, "grad_norm": 1.3864970285168516, "learning_rate": 3.714280185158771e-06, "loss": 0.4554, "step": 6117 }, { "epoch": 0.5954257907542579, "grad_norm": 1.502417682290556, "learning_rate": 3.7127570950145132e-06, "loss": 0.4773, "step": 6118 }, { "epoch": 0.5955231143552312, "grad_norm": 1.370124844062712, "learning_rate": 3.7112341327843744e-06, "loss": 0.3619, "step": 6119 }, { "epoch": 0.5956204379562043, "grad_norm": 1.2822973576080363, "learning_rate": 3.7097112986196926e-06, "loss": 0.3291, "step": 6120 }, { "epoch": 0.5957177615571776, "grad_norm": 1.4169549181274235, "learning_rate": 3.7081885926717908e-06, "loss": 0.365, "step": 6121 }, { "epoch": 0.5958150851581508, "grad_norm": 1.5093831790678773, "learning_rate": 3.706666015091983e-06, "loss": 0.4236, "step": 6122 }, { "epoch": 0.5959124087591241, "grad_norm": 1.5976497584726386, "learning_rate": 3.7051435660315682e-06, "loss": 0.4744, "step": 6123 }, { "epoch": 0.5960097323600974, "grad_norm": 1.1751598350897283, "learning_rate": 3.7036212456418353e-06, "loss": 0.2396, "step": 6124 }, { "epoch": 0.5961070559610706, "grad_norm": 1.4597928872241048, "learning_rate": 3.7020990540740542e-06, "loss": 0.3955, "step": 6125 }, { "epoch": 0.5962043795620438, "grad_norm": 1.5809562138786184, "learning_rate": 3.7005769914794866e-06, "loss": 0.2964, "step": 6126 }, { "epoch": 0.596301703163017, "grad_norm": 2.128641822561641, "learning_rate": 3.699055058009385e-06, "loss": 0.3346, "step": 6127 }, { "epoch": 0.5963990267639903, "grad_norm": 1.8437655173473075, "learning_rate": 3.69753325381498e-06, "loss": 0.3568, "step": 6128 }, { "epoch": 0.5964963503649635, "grad_norm": 1.56867706007675, "learning_rate": 3.696011579047496e-06, "loss": 0.3537, "step": 6129 }, { "epoch": 0.5965936739659368, "grad_norm": 1.3632014122787737, "learning_rate": 3.6944900338581423e-06, "loss": 0.4356, "step": 6130 }, { "epoch": 0.59669099756691, "grad_norm": 1.4999014504166313, "learning_rate": 3.6929686183981185e-06, "loss": 0.3298, "step": 6131 }, { "epoch": 0.5967883211678832, "grad_norm": 1.3682686155179566, "learning_rate": 3.6914473328186045e-06, "loss": 0.5047, "step": 6132 }, { "epoch": 0.5968856447688564, "grad_norm": 1.365093218446463, "learning_rate": 3.689926177270774e-06, "loss": 0.2212, "step": 6133 }, { "epoch": 0.5969829683698297, "grad_norm": 1.3199006290312039, "learning_rate": 3.688405151905786e-06, "loss": 0.3044, "step": 6134 }, { "epoch": 0.5970802919708029, "grad_norm": 1.2308004749555046, "learning_rate": 3.6868842568747833e-06, "loss": 0.3186, "step": 6135 }, { "epoch": 0.5971776155717762, "grad_norm": 1.513789294210874, "learning_rate": 3.6853634923288966e-06, "loss": 0.3573, "step": 6136 }, { "epoch": 0.5972749391727494, "grad_norm": 1.347018237363608, "learning_rate": 3.683842858419249e-06, "loss": 0.2027, "step": 6137 }, { "epoch": 0.5973722627737226, "grad_norm": 1.5573097236462499, "learning_rate": 3.6823223552969483e-06, "loss": 0.3346, "step": 6138 }, { "epoch": 0.5974695863746958, "grad_norm": 1.2942317084290391, "learning_rate": 3.6808019831130824e-06, "loss": 0.2606, "step": 6139 }, { "epoch": 0.5975669099756691, "grad_norm": 1.4921158378928883, "learning_rate": 3.679281742018735e-06, "loss": 0.2087, "step": 6140 }, { "epoch": 0.5976642335766423, "grad_norm": 1.4239374649911147, "learning_rate": 3.6777616321649723e-06, "loss": 0.4631, "step": 6141 }, { "epoch": 0.5977615571776156, "grad_norm": 1.6193438173543353, "learning_rate": 3.67624165370285e-06, "loss": 0.4785, "step": 6142 }, { "epoch": 0.5978588807785888, "grad_norm": 1.6575105296817452, "learning_rate": 3.6747218067834066e-06, "loss": 0.3292, "step": 6143 }, { "epoch": 0.5979562043795621, "grad_norm": 2.141420314092702, "learning_rate": 3.673202091557673e-06, "loss": 0.4711, "step": 6144 }, { "epoch": 0.5980535279805352, "grad_norm": 1.3956600290387677, "learning_rate": 3.671682508176664e-06, "loss": 0.3087, "step": 6145 }, { "epoch": 0.5981508515815085, "grad_norm": 1.567137816108438, "learning_rate": 3.670163056791378e-06, "loss": 0.3109, "step": 6146 }, { "epoch": 0.5982481751824817, "grad_norm": 1.3736168233308903, "learning_rate": 3.6686437375528072e-06, "loss": 0.368, "step": 6147 }, { "epoch": 0.598345498783455, "grad_norm": 1.8295882148419824, "learning_rate": 3.667124550611927e-06, "loss": 0.3267, "step": 6148 }, { "epoch": 0.5984428223844283, "grad_norm": 1.4698423016876485, "learning_rate": 3.665605496119701e-06, "loss": 0.3553, "step": 6149 }, { "epoch": 0.5985401459854015, "grad_norm": 2.5955540461387048, "learning_rate": 3.6640865742270756e-06, "loss": 0.4544, "step": 6150 }, { "epoch": 0.5986374695863746, "grad_norm": 1.4561759340499938, "learning_rate": 3.6625677850849877e-06, "loss": 0.4342, "step": 6151 }, { "epoch": 0.5987347931873479, "grad_norm": 1.4778311794275778, "learning_rate": 3.6610491288443628e-06, "loss": 0.3157, "step": 6152 }, { "epoch": 0.5988321167883212, "grad_norm": 1.3844987318832855, "learning_rate": 3.6595306056561077e-06, "loss": 0.3598, "step": 6153 }, { "epoch": 0.5989294403892944, "grad_norm": 1.5713692520761118, "learning_rate": 3.6580122156711194e-06, "loss": 0.5598, "step": 6154 }, { "epoch": 0.5990267639902677, "grad_norm": 1.6172904014099247, "learning_rate": 3.656493959040283e-06, "loss": 0.3726, "step": 6155 }, { "epoch": 0.5991240875912409, "grad_norm": 1.3148188316082874, "learning_rate": 3.6549758359144693e-06, "loss": 0.4045, "step": 6156 }, { "epoch": 0.5992214111922141, "grad_norm": 1.4638324959927835, "learning_rate": 3.6534578464445314e-06, "loss": 0.3574, "step": 6157 }, { "epoch": 0.5993187347931873, "grad_norm": 1.7060596485644455, "learning_rate": 3.6519399907813148e-06, "loss": 0.5914, "step": 6158 }, { "epoch": 0.5994160583941606, "grad_norm": 1.2134973356911638, "learning_rate": 3.6504222690756502e-06, "loss": 0.296, "step": 6159 }, { "epoch": 0.5995133819951338, "grad_norm": 1.5012102013489095, "learning_rate": 3.648904681478357e-06, "loss": 0.4598, "step": 6160 }, { "epoch": 0.5996107055961071, "grad_norm": 1.668964169363922, "learning_rate": 3.647387228140233e-06, "loss": 0.6214, "step": 6161 }, { "epoch": 0.5997080291970803, "grad_norm": 1.3209128735938092, "learning_rate": 3.6458699092120718e-06, "loss": 0.3505, "step": 6162 }, { "epoch": 0.5998053527980536, "grad_norm": 1.432673353720695, "learning_rate": 3.644352724844651e-06, "loss": 0.3196, "step": 6163 }, { "epoch": 0.5999026763990267, "grad_norm": 1.4673750914877397, "learning_rate": 3.642835675188733e-06, "loss": 0.4652, "step": 6164 }, { "epoch": 0.6, "grad_norm": 1.11179399698129, "learning_rate": 3.6413187603950672e-06, "loss": 0.1711, "step": 6165 }, { "epoch": 0.6000973236009732, "grad_norm": 1.1337512631814095, "learning_rate": 3.6398019806143914e-06, "loss": 0.2413, "step": 6166 }, { "epoch": 0.6001946472019465, "grad_norm": 1.5778537932666719, "learning_rate": 3.6382853359974302e-06, "loss": 0.3535, "step": 6167 }, { "epoch": 0.6002919708029197, "grad_norm": 1.306135276611625, "learning_rate": 3.636768826694891e-06, "loss": 0.3538, "step": 6168 }, { "epoch": 0.600389294403893, "grad_norm": 0.9829387783804582, "learning_rate": 3.635252452857471e-06, "loss": 0.1732, "step": 6169 }, { "epoch": 0.6004866180048661, "grad_norm": 1.4643040184046618, "learning_rate": 3.633736214635856e-06, "loss": 0.3299, "step": 6170 }, { "epoch": 0.6005839416058394, "grad_norm": 1.6056317991452056, "learning_rate": 3.632220112180711e-06, "loss": 0.3551, "step": 6171 }, { "epoch": 0.6006812652068126, "grad_norm": 1.4285296978986193, "learning_rate": 3.6307041456426946e-06, "loss": 0.3768, "step": 6172 }, { "epoch": 0.6007785888077859, "grad_norm": 1.3940771736718718, "learning_rate": 3.6291883151724483e-06, "loss": 0.3949, "step": 6173 }, { "epoch": 0.6008759124087591, "grad_norm": 1.588750139742169, "learning_rate": 3.6276726209206026e-06, "loss": 0.4221, "step": 6174 }, { "epoch": 0.6009732360097324, "grad_norm": 1.8091632119521663, "learning_rate": 3.6261570630377716e-06, "loss": 0.2268, "step": 6175 }, { "epoch": 0.6010705596107055, "grad_norm": 1.574034422123617, "learning_rate": 3.6246416416745577e-06, "loss": 0.482, "step": 6176 }, { "epoch": 0.6011678832116788, "grad_norm": 1.4551104036373603, "learning_rate": 3.623126356981549e-06, "loss": 0.3554, "step": 6177 }, { "epoch": 0.601265206812652, "grad_norm": 1.7832562040720823, "learning_rate": 3.6216112091093215e-06, "loss": 0.4079, "step": 6178 }, { "epoch": 0.6013625304136253, "grad_norm": 1.482659563351729, "learning_rate": 3.620096198208434e-06, "loss": 0.4811, "step": 6179 }, { "epoch": 0.6014598540145986, "grad_norm": 1.4021447340664892, "learning_rate": 3.6185813244294355e-06, "loss": 0.3706, "step": 6180 }, { "epoch": 0.6015571776155718, "grad_norm": 1.4073653566318252, "learning_rate": 3.617066587922863e-06, "loss": 0.4801, "step": 6181 }, { "epoch": 0.601654501216545, "grad_norm": 1.2371213275800568, "learning_rate": 3.6155519888392306e-06, "loss": 0.336, "step": 6182 }, { "epoch": 0.6017518248175182, "grad_norm": 1.5131655529186325, "learning_rate": 3.614037527329048e-06, "loss": 0.3914, "step": 6183 }, { "epoch": 0.6018491484184915, "grad_norm": 1.5176163794999507, "learning_rate": 3.612523203542808e-06, "loss": 0.478, "step": 6184 }, { "epoch": 0.6019464720194647, "grad_norm": 1.3559598884923307, "learning_rate": 3.611009017630992e-06, "loss": 0.4438, "step": 6185 }, { "epoch": 0.602043795620438, "grad_norm": 1.451247941047726, "learning_rate": 3.609494969744062e-06, "loss": 0.4221, "step": 6186 }, { "epoch": 0.6021411192214112, "grad_norm": 1.4754502078128098, "learning_rate": 3.6079810600324718e-06, "loss": 0.2953, "step": 6187 }, { "epoch": 0.6022384428223845, "grad_norm": 3.091204125557194, "learning_rate": 3.606467288646659e-06, "loss": 0.2391, "step": 6188 }, { "epoch": 0.6023357664233576, "grad_norm": 1.2406028380444387, "learning_rate": 3.6049536557370494e-06, "loss": 0.2906, "step": 6189 }, { "epoch": 0.6024330900243309, "grad_norm": 1.2436665300893865, "learning_rate": 3.6034401614540516e-06, "loss": 0.3346, "step": 6190 }, { "epoch": 0.6025304136253041, "grad_norm": 1.5299762176709786, "learning_rate": 3.601926805948063e-06, "loss": 0.5753, "step": 6191 }, { "epoch": 0.6026277372262774, "grad_norm": 1.2260847679908822, "learning_rate": 3.6004135893694698e-06, "loss": 0.2925, "step": 6192 }, { "epoch": 0.6027250608272506, "grad_norm": 1.2302483255258292, "learning_rate": 3.598900511868636e-06, "loss": 0.2663, "step": 6193 }, { "epoch": 0.6028223844282239, "grad_norm": 1.5455862514453014, "learning_rate": 3.5973875735959196e-06, "loss": 0.323, "step": 6194 }, { "epoch": 0.602919708029197, "grad_norm": 1.3196179972942772, "learning_rate": 3.595874774701661e-06, "loss": 0.3211, "step": 6195 }, { "epoch": 0.6030170316301703, "grad_norm": 1.4122169210969915, "learning_rate": 3.5943621153361918e-06, "loss": 0.4268, "step": 6196 }, { "epoch": 0.6031143552311435, "grad_norm": 1.7083449557149384, "learning_rate": 3.592849595649822e-06, "loss": 0.7361, "step": 6197 }, { "epoch": 0.6032116788321168, "grad_norm": 1.3428967450485307, "learning_rate": 3.5913372157928515e-06, "loss": 0.2302, "step": 6198 }, { "epoch": 0.60330900243309, "grad_norm": 1.076313874833649, "learning_rate": 3.58982497591557e-06, "loss": 0.2554, "step": 6199 }, { "epoch": 0.6034063260340633, "grad_norm": 1.650622542298931, "learning_rate": 3.5883128761682454e-06, "loss": 0.5339, "step": 6200 }, { "epoch": 0.6035036496350364, "grad_norm": 1.3880897299705472, "learning_rate": 3.5868009167011388e-06, "loss": 0.3218, "step": 6201 }, { "epoch": 0.6036009732360097, "grad_norm": 1.5121167666846589, "learning_rate": 3.5852890976644935e-06, "loss": 0.4513, "step": 6202 }, { "epoch": 0.603698296836983, "grad_norm": 1.4633278288915132, "learning_rate": 3.583777419208542e-06, "loss": 0.5499, "step": 6203 }, { "epoch": 0.6037956204379562, "grad_norm": 1.4117203172549408, "learning_rate": 3.5822658814834964e-06, "loss": 0.3729, "step": 6204 }, { "epoch": 0.6038929440389295, "grad_norm": 1.4710303926524353, "learning_rate": 3.5807544846395613e-06, "loss": 0.4537, "step": 6205 }, { "epoch": 0.6039902676399027, "grad_norm": 1.4998656848535865, "learning_rate": 3.579243228826926e-06, "loss": 0.3768, "step": 6206 }, { "epoch": 0.604087591240876, "grad_norm": 1.3493881817200788, "learning_rate": 3.5777321141957666e-06, "loss": 0.2518, "step": 6207 }, { "epoch": 0.6041849148418491, "grad_norm": 1.4274298347024477, "learning_rate": 3.576221140896239e-06, "loss": 0.3759, "step": 6208 }, { "epoch": 0.6042822384428224, "grad_norm": 1.4704630175742153, "learning_rate": 3.574710309078492e-06, "loss": 0.4538, "step": 6209 }, { "epoch": 0.6043795620437956, "grad_norm": 1.4107483019875193, "learning_rate": 3.5731996188926584e-06, "loss": 0.3414, "step": 6210 }, { "epoch": 0.6044768856447689, "grad_norm": 1.470750866199934, "learning_rate": 3.571689070488854e-06, "loss": 0.4459, "step": 6211 }, { "epoch": 0.6045742092457421, "grad_norm": 1.3873175819009333, "learning_rate": 3.5701786640171853e-06, "loss": 0.2683, "step": 6212 }, { "epoch": 0.6046715328467154, "grad_norm": 2.2515251500713487, "learning_rate": 3.5686683996277417e-06, "loss": 0.5125, "step": 6213 }, { "epoch": 0.6047688564476885, "grad_norm": 1.6249584045955299, "learning_rate": 3.567158277470601e-06, "loss": 0.5648, "step": 6214 }, { "epoch": 0.6048661800486618, "grad_norm": 1.3437310594510203, "learning_rate": 3.5656482976958206e-06, "loss": 0.3504, "step": 6215 }, { "epoch": 0.604963503649635, "grad_norm": 1.311697033082678, "learning_rate": 3.5641384604534503e-06, "loss": 0.3048, "step": 6216 }, { "epoch": 0.6050608272506083, "grad_norm": 1.668230880401363, "learning_rate": 3.5626287658935254e-06, "loss": 0.516, "step": 6217 }, { "epoch": 0.6051581508515815, "grad_norm": 1.4794528666391957, "learning_rate": 3.561119214166062e-06, "loss": 0.4903, "step": 6218 }, { "epoch": 0.6052554744525548, "grad_norm": 1.4771961285666158, "learning_rate": 3.559609805421067e-06, "loss": 0.287, "step": 6219 }, { "epoch": 0.6053527980535279, "grad_norm": 1.328846724139663, "learning_rate": 3.5581005398085313e-06, "loss": 0.38, "step": 6220 }, { "epoch": 0.6054501216545012, "grad_norm": 1.2685308182535933, "learning_rate": 3.5565914174784322e-06, "loss": 0.3403, "step": 6221 }, { "epoch": 0.6055474452554744, "grad_norm": 1.3275299534408687, "learning_rate": 3.5550824385807293e-06, "loss": 0.4229, "step": 6222 }, { "epoch": 0.6056447688564477, "grad_norm": 1.6401471123191445, "learning_rate": 3.5535736032653735e-06, "loss": 0.4136, "step": 6223 }, { "epoch": 0.6057420924574209, "grad_norm": 1.5123942440671032, "learning_rate": 3.552064911682297e-06, "loss": 0.5184, "step": 6224 }, { "epoch": 0.6058394160583942, "grad_norm": 1.5317638858385545, "learning_rate": 3.550556363981422e-06, "loss": 0.5174, "step": 6225 }, { "epoch": 0.6059367396593675, "grad_norm": 1.5048847136587602, "learning_rate": 3.5490479603126498e-06, "loss": 0.4189, "step": 6226 }, { "epoch": 0.6060340632603406, "grad_norm": 1.4332614274433582, "learning_rate": 3.5475397008258744e-06, "loss": 0.5102, "step": 6227 }, { "epoch": 0.6061313868613138, "grad_norm": 1.5056757833579093, "learning_rate": 3.546031585670974e-06, "loss": 0.5698, "step": 6228 }, { "epoch": 0.6062287104622871, "grad_norm": 1.3338492507973077, "learning_rate": 3.5445236149978057e-06, "loss": 0.2297, "step": 6229 }, { "epoch": 0.6063260340632604, "grad_norm": 2.036144053476165, "learning_rate": 3.5430157889562213e-06, "loss": 0.4453, "step": 6230 }, { "epoch": 0.6064233576642336, "grad_norm": 1.6536445998139675, "learning_rate": 3.541508107696053e-06, "loss": 0.5028, "step": 6231 }, { "epoch": 0.6065206812652069, "grad_norm": 1.2233850908807473, "learning_rate": 3.5400005713671215e-06, "loss": 0.2803, "step": 6232 }, { "epoch": 0.60661800486618, "grad_norm": 1.3361003397249438, "learning_rate": 3.53849318011923e-06, "loss": 0.3615, "step": 6233 }, { "epoch": 0.6067153284671533, "grad_norm": 8.054935662399211, "learning_rate": 3.536985934102169e-06, "loss": 0.4064, "step": 6234 }, { "epoch": 0.6068126520681265, "grad_norm": 1.484965156891365, "learning_rate": 3.5354788334657174e-06, "loss": 0.5168, "step": 6235 }, { "epoch": 0.6069099756690998, "grad_norm": 1.2066530295562112, "learning_rate": 3.533971878359631e-06, "loss": 0.2411, "step": 6236 }, { "epoch": 0.607007299270073, "grad_norm": 1.4672636262199528, "learning_rate": 3.532465068933661e-06, "loss": 0.3032, "step": 6237 }, { "epoch": 0.6071046228710463, "grad_norm": 1.3348149273491243, "learning_rate": 3.530958405337539e-06, "loss": 0.3891, "step": 6238 }, { "epoch": 0.6072019464720194, "grad_norm": 1.2659906871567623, "learning_rate": 3.529451887720986e-06, "loss": 0.3701, "step": 6239 }, { "epoch": 0.6072992700729927, "grad_norm": 1.4119732423116556, "learning_rate": 3.5279455162337007e-06, "loss": 0.4224, "step": 6240 }, { "epoch": 0.6073965936739659, "grad_norm": 1.4963274342474535, "learning_rate": 3.526439291025373e-06, "loss": 0.3888, "step": 6241 }, { "epoch": 0.6074939172749392, "grad_norm": 1.2622868820698283, "learning_rate": 3.5249332122456803e-06, "loss": 0.3492, "step": 6242 }, { "epoch": 0.6075912408759124, "grad_norm": 1.4096067419917093, "learning_rate": 3.523427280044281e-06, "loss": 0.3487, "step": 6243 }, { "epoch": 0.6076885644768857, "grad_norm": 1.5095705841534075, "learning_rate": 3.5219214945708193e-06, "loss": 0.4579, "step": 6244 }, { "epoch": 0.6077858880778588, "grad_norm": 1.3840115623377025, "learning_rate": 3.520415855974928e-06, "loss": 0.3845, "step": 6245 }, { "epoch": 0.6078832116788321, "grad_norm": 1.2929143519641557, "learning_rate": 3.518910364406223e-06, "loss": 0.3106, "step": 6246 }, { "epoch": 0.6079805352798053, "grad_norm": 1.8484000710884219, "learning_rate": 3.517405020014304e-06, "loss": 0.4175, "step": 6247 }, { "epoch": 0.6080778588807786, "grad_norm": 1.578142675923983, "learning_rate": 3.51589982294876e-06, "loss": 0.5108, "step": 6248 }, { "epoch": 0.6081751824817518, "grad_norm": 1.6264769371313765, "learning_rate": 3.5143947733591633e-06, "loss": 0.5404, "step": 6249 }, { "epoch": 0.6082725060827251, "grad_norm": 1.3617419540170375, "learning_rate": 3.512889871395072e-06, "loss": 0.3561, "step": 6250 }, { "epoch": 0.6083698296836983, "grad_norm": 1.318331705438804, "learning_rate": 3.511385117206027e-06, "loss": 0.3393, "step": 6251 }, { "epoch": 0.6084671532846715, "grad_norm": 1.4329370142956308, "learning_rate": 3.509880510941558e-06, "loss": 0.3546, "step": 6252 }, { "epoch": 0.6085644768856447, "grad_norm": 1.2462490068774155, "learning_rate": 3.508376052751179e-06, "loss": 0.3055, "step": 6253 }, { "epoch": 0.608661800486618, "grad_norm": 1.6540416173807684, "learning_rate": 3.5068717427843873e-06, "loss": 0.446, "step": 6254 }, { "epoch": 0.6087591240875913, "grad_norm": 1.4402205389737914, "learning_rate": 3.5053675811906683e-06, "loss": 0.3518, "step": 6255 }, { "epoch": 0.6088564476885645, "grad_norm": 0.9093126963872747, "learning_rate": 3.5038635681194922e-06, "loss": 0.2299, "step": 6256 }, { "epoch": 0.6089537712895378, "grad_norm": 1.3714844671350945, "learning_rate": 3.502359703720313e-06, "loss": 0.4417, "step": 6257 }, { "epoch": 0.6090510948905109, "grad_norm": 1.3195576513256702, "learning_rate": 3.5008559881425703e-06, "loss": 0.2737, "step": 6258 }, { "epoch": 0.6091484184914842, "grad_norm": 1.5278386633714716, "learning_rate": 3.499352421535689e-06, "loss": 0.43, "step": 6259 }, { "epoch": 0.6092457420924574, "grad_norm": 1.1993787826470486, "learning_rate": 3.49784900404908e-06, "loss": 0.3391, "step": 6260 }, { "epoch": 0.6093430656934307, "grad_norm": 1.3114718997284942, "learning_rate": 3.4963457358321416e-06, "loss": 0.1708, "step": 6261 }, { "epoch": 0.6094403892944039, "grad_norm": 1.4589053621562125, "learning_rate": 3.494842617034249e-06, "loss": 0.2967, "step": 6262 }, { "epoch": 0.6095377128953772, "grad_norm": 1.7212200102898008, "learning_rate": 3.4933396478047702e-06, "loss": 0.3529, "step": 6263 }, { "epoch": 0.6096350364963503, "grad_norm": 1.4633618806619277, "learning_rate": 3.491836828293058e-06, "loss": 0.4818, "step": 6264 }, { "epoch": 0.6097323600973236, "grad_norm": 1.561796106099118, "learning_rate": 3.4903341586484464e-06, "loss": 0.3899, "step": 6265 }, { "epoch": 0.6098296836982968, "grad_norm": 1.3723783642732574, "learning_rate": 3.4888316390202577e-06, "loss": 0.391, "step": 6266 }, { "epoch": 0.6099270072992701, "grad_norm": 1.5967327704048786, "learning_rate": 3.487329269557797e-06, "loss": 0.5419, "step": 6267 }, { "epoch": 0.6100243309002433, "grad_norm": 1.5516854443969215, "learning_rate": 3.485827050410358e-06, "loss": 0.3598, "step": 6268 }, { "epoch": 0.6101216545012166, "grad_norm": 1.4569098291607396, "learning_rate": 3.484324981727215e-06, "loss": 0.4801, "step": 6269 }, { "epoch": 0.6102189781021898, "grad_norm": 1.4186526664249823, "learning_rate": 3.48282306365763e-06, "loss": 0.4003, "step": 6270 }, { "epoch": 0.610316301703163, "grad_norm": 1.1813037234406614, "learning_rate": 3.4813212963508514e-06, "loss": 0.3108, "step": 6271 }, { "epoch": 0.6104136253041362, "grad_norm": 1.5961683288567687, "learning_rate": 3.4798196799561067e-06, "loss": 0.3287, "step": 6272 }, { "epoch": 0.6105109489051095, "grad_norm": 1.3929563546391415, "learning_rate": 3.478318214622616e-06, "loss": 0.3976, "step": 6273 }, { "epoch": 0.6106082725060827, "grad_norm": 1.5244730242140714, "learning_rate": 3.476816900499578e-06, "loss": 0.2579, "step": 6274 }, { "epoch": 0.610705596107056, "grad_norm": 1.4662098935521044, "learning_rate": 3.4753157377361837e-06, "loss": 0.2799, "step": 6275 }, { "epoch": 0.6108029197080292, "grad_norm": 1.4801693414773403, "learning_rate": 3.473814726481599e-06, "loss": 0.2896, "step": 6276 }, { "epoch": 0.6109002433090024, "grad_norm": 1.5415807734438165, "learning_rate": 3.4723138668849837e-06, "loss": 0.3406, "step": 6277 }, { "epoch": 0.6109975669099756, "grad_norm": 1.413446450265214, "learning_rate": 3.4708131590954787e-06, "loss": 0.1766, "step": 6278 }, { "epoch": 0.6110948905109489, "grad_norm": 1.6088417428984825, "learning_rate": 3.4693126032622105e-06, "loss": 0.5242, "step": 6279 }, { "epoch": 0.6111922141119221, "grad_norm": 1.441357619542961, "learning_rate": 3.467812199534289e-06, "loss": 0.4387, "step": 6280 }, { "epoch": 0.6112895377128954, "grad_norm": 1.4570273227996673, "learning_rate": 3.466311948060811e-06, "loss": 0.4021, "step": 6281 }, { "epoch": 0.6113868613138687, "grad_norm": 1.4930653810793868, "learning_rate": 3.4648118489908588e-06, "loss": 0.4825, "step": 6282 }, { "epoch": 0.6114841849148418, "grad_norm": 1.571461726743023, "learning_rate": 3.4633119024734958e-06, "loss": 0.5449, "step": 6283 }, { "epoch": 0.611581508515815, "grad_norm": 1.2925223879470797, "learning_rate": 3.4618121086577727e-06, "loss": 0.3311, "step": 6284 }, { "epoch": 0.6116788321167883, "grad_norm": 1.6121883061512903, "learning_rate": 3.4603124676927257e-06, "loss": 0.5423, "step": 6285 }, { "epoch": 0.6117761557177616, "grad_norm": 1.3835274092947698, "learning_rate": 3.4588129797273773e-06, "loss": 0.4545, "step": 6286 }, { "epoch": 0.6118734793187348, "grad_norm": 1.6117108624515402, "learning_rate": 3.4573136449107293e-06, "loss": 0.2824, "step": 6287 }, { "epoch": 0.6119708029197081, "grad_norm": 1.5783400436280206, "learning_rate": 3.455814463391771e-06, "loss": 0.5238, "step": 6288 }, { "epoch": 0.6120681265206812, "grad_norm": 1.4494124537568134, "learning_rate": 3.4543154353194812e-06, "loss": 0.3216, "step": 6289 }, { "epoch": 0.6121654501216545, "grad_norm": 1.4527986221487594, "learning_rate": 3.4528165608428153e-06, "loss": 0.3118, "step": 6290 }, { "epoch": 0.6122627737226277, "grad_norm": 1.505068084976367, "learning_rate": 3.4513178401107184e-06, "loss": 0.2119, "step": 6291 }, { "epoch": 0.612360097323601, "grad_norm": 1.3567726393948163, "learning_rate": 3.44981927327212e-06, "loss": 0.3592, "step": 6292 }, { "epoch": 0.6124574209245742, "grad_norm": 1.5213220237915581, "learning_rate": 3.448320860475934e-06, "loss": 0.5401, "step": 6293 }, { "epoch": 0.6125547445255475, "grad_norm": 1.8487148357397571, "learning_rate": 3.4468226018710577e-06, "loss": 0.4019, "step": 6294 }, { "epoch": 0.6126520681265207, "grad_norm": 1.2125102996388717, "learning_rate": 3.445324497606372e-06, "loss": 0.2799, "step": 6295 }, { "epoch": 0.6127493917274939, "grad_norm": 1.3350068358628495, "learning_rate": 3.4438265478307477e-06, "loss": 0.3789, "step": 6296 }, { "epoch": 0.6128467153284671, "grad_norm": 1.5770562206979775, "learning_rate": 3.4423287526930383e-06, "loss": 0.5942, "step": 6297 }, { "epoch": 0.6129440389294404, "grad_norm": 1.41334127506388, "learning_rate": 3.4408311123420755e-06, "loss": 0.2732, "step": 6298 }, { "epoch": 0.6130413625304136, "grad_norm": 1.649309695946923, "learning_rate": 3.4393336269266837e-06, "loss": 0.2456, "step": 6299 }, { "epoch": 0.6131386861313869, "grad_norm": 1.2926830425549862, "learning_rate": 3.4378362965956695e-06, "loss": 0.3859, "step": 6300 }, { "epoch": 0.6132360097323601, "grad_norm": 1.499433012933645, "learning_rate": 3.436339121497822e-06, "loss": 0.4412, "step": 6301 }, { "epoch": 0.6133333333333333, "grad_norm": 1.4107012342846055, "learning_rate": 3.4348421017819167e-06, "loss": 0.4779, "step": 6302 }, { "epoch": 0.6134306569343065, "grad_norm": 1.4620967080711322, "learning_rate": 3.4333452375967143e-06, "loss": 0.4425, "step": 6303 }, { "epoch": 0.6135279805352798, "grad_norm": 1.4483668041568563, "learning_rate": 3.4318485290909604e-06, "loss": 0.2765, "step": 6304 }, { "epoch": 0.613625304136253, "grad_norm": 1.4808643788761575, "learning_rate": 3.4303519764133784e-06, "loss": 0.3663, "step": 6305 }, { "epoch": 0.6137226277372263, "grad_norm": 1.4000004393713126, "learning_rate": 3.428855579712687e-06, "loss": 0.3395, "step": 6306 }, { "epoch": 0.6138199513381996, "grad_norm": 1.3367216251772667, "learning_rate": 3.4273593391375844e-06, "loss": 0.3569, "step": 6307 }, { "epoch": 0.6139172749391727, "grad_norm": 1.560089999317103, "learning_rate": 3.4258632548367487e-06, "loss": 0.4118, "step": 6308 }, { "epoch": 0.614014598540146, "grad_norm": 1.1278244167595692, "learning_rate": 3.4243673269588485e-06, "loss": 0.2091, "step": 6309 }, { "epoch": 0.6141119221411192, "grad_norm": 1.4973723124992595, "learning_rate": 3.422871555652536e-06, "loss": 0.4631, "step": 6310 }, { "epoch": 0.6142092457420925, "grad_norm": 1.4001545939921534, "learning_rate": 3.421375941066447e-06, "loss": 0.439, "step": 6311 }, { "epoch": 0.6143065693430657, "grad_norm": 1.5825897361298165, "learning_rate": 3.4198804833492004e-06, "loss": 0.4885, "step": 6312 }, { "epoch": 0.614403892944039, "grad_norm": 1.2241923764521647, "learning_rate": 3.4183851826494015e-06, "loss": 0.2927, "step": 6313 }, { "epoch": 0.6145012165450122, "grad_norm": 1.6212037566214181, "learning_rate": 3.416890039115639e-06, "loss": 0.3205, "step": 6314 }, { "epoch": 0.6145985401459854, "grad_norm": 2.2490969011082984, "learning_rate": 3.4153950528964867e-06, "loss": 0.4081, "step": 6315 }, { "epoch": 0.6146958637469586, "grad_norm": 1.259483412248355, "learning_rate": 3.4139002241405016e-06, "loss": 0.3144, "step": 6316 }, { "epoch": 0.6147931873479319, "grad_norm": 1.3599382746236823, "learning_rate": 3.4124055529962263e-06, "loss": 0.4379, "step": 6317 }, { "epoch": 0.6148905109489051, "grad_norm": 1.4671575151168392, "learning_rate": 3.4109110396121886e-06, "loss": 0.2986, "step": 6318 }, { "epoch": 0.6149878345498784, "grad_norm": 1.2928353324351105, "learning_rate": 3.409416684136896e-06, "loss": 0.3141, "step": 6319 }, { "epoch": 0.6150851581508516, "grad_norm": 1.6815782418016656, "learning_rate": 3.4079224867188447e-06, "loss": 0.5077, "step": 6320 }, { "epoch": 0.6151824817518248, "grad_norm": 2.0106035715561497, "learning_rate": 3.4064284475065148e-06, "loss": 0.6779, "step": 6321 }, { "epoch": 0.615279805352798, "grad_norm": 1.5034343821418512, "learning_rate": 3.4049345666483703e-06, "loss": 0.3309, "step": 6322 }, { "epoch": 0.6153771289537713, "grad_norm": 1.4676413714599856, "learning_rate": 3.403440844292858e-06, "loss": 0.4523, "step": 6323 }, { "epoch": 0.6154744525547445, "grad_norm": 1.272720041192553, "learning_rate": 3.401947280588409e-06, "loss": 0.285, "step": 6324 }, { "epoch": 0.6155717761557178, "grad_norm": 2.0024256302147694, "learning_rate": 3.400453875683442e-06, "loss": 0.2769, "step": 6325 }, { "epoch": 0.615669099756691, "grad_norm": 1.416829810378921, "learning_rate": 3.3989606297263576e-06, "loss": 0.2241, "step": 6326 }, { "epoch": 0.6157664233576642, "grad_norm": 1.4792782468305348, "learning_rate": 3.397467542865538e-06, "loss": 0.4518, "step": 6327 }, { "epoch": 0.6158637469586374, "grad_norm": 1.3309962483015716, "learning_rate": 3.395974615249354e-06, "loss": 0.4229, "step": 6328 }, { "epoch": 0.6159610705596107, "grad_norm": 1.5278524736967327, "learning_rate": 3.3944818470261604e-06, "loss": 0.3332, "step": 6329 }, { "epoch": 0.6160583941605839, "grad_norm": 1.5674212204454987, "learning_rate": 3.39298923834429e-06, "loss": 0.4768, "step": 6330 }, { "epoch": 0.6161557177615572, "grad_norm": 1.7051468493407753, "learning_rate": 3.3914967893520673e-06, "loss": 0.3586, "step": 6331 }, { "epoch": 0.6162530413625305, "grad_norm": 1.6596272422325444, "learning_rate": 3.390004500197797e-06, "loss": 0.4529, "step": 6332 }, { "epoch": 0.6163503649635036, "grad_norm": 1.4436935017417774, "learning_rate": 3.38851237102977e-06, "loss": 0.2779, "step": 6333 }, { "epoch": 0.6164476885644768, "grad_norm": 1.7240503599684307, "learning_rate": 3.3870204019962583e-06, "loss": 0.4542, "step": 6334 }, { "epoch": 0.6165450121654501, "grad_norm": 1.4194391366635841, "learning_rate": 3.3855285932455204e-06, "loss": 0.37, "step": 6335 }, { "epoch": 0.6166423357664234, "grad_norm": 1.4749399787199533, "learning_rate": 3.3840369449258005e-06, "loss": 0.478, "step": 6336 }, { "epoch": 0.6167396593673966, "grad_norm": 1.0682405339452568, "learning_rate": 3.3825454571853213e-06, "loss": 0.2321, "step": 6337 }, { "epoch": 0.6168369829683699, "grad_norm": 1.122839123956751, "learning_rate": 3.3810541301722932e-06, "loss": 0.2402, "step": 6338 }, { "epoch": 0.6169343065693431, "grad_norm": 1.485590707963305, "learning_rate": 3.3795629640349127e-06, "loss": 0.2774, "step": 6339 }, { "epoch": 0.6170316301703163, "grad_norm": 1.5247485657081794, "learning_rate": 3.378071958921357e-06, "loss": 0.4048, "step": 6340 }, { "epoch": 0.6171289537712895, "grad_norm": 2.2602022716848693, "learning_rate": 3.3765811149797866e-06, "loss": 0.1987, "step": 6341 }, { "epoch": 0.6172262773722628, "grad_norm": 1.862554444406962, "learning_rate": 3.375090432358349e-06, "loss": 0.374, "step": 6342 }, { "epoch": 0.617323600973236, "grad_norm": 1.2615712082519541, "learning_rate": 3.373599911205173e-06, "loss": 0.3411, "step": 6343 }, { "epoch": 0.6174209245742093, "grad_norm": 1.5993289911376303, "learning_rate": 3.3721095516683745e-06, "loss": 0.3269, "step": 6344 }, { "epoch": 0.6175182481751825, "grad_norm": 1.3306415725614866, "learning_rate": 3.3706193538960497e-06, "loss": 0.1994, "step": 6345 }, { "epoch": 0.6176155717761557, "grad_norm": 2.021754680750299, "learning_rate": 3.3691293180362817e-06, "loss": 0.4255, "step": 6346 }, { "epoch": 0.6177128953771289, "grad_norm": 1.6757927740807863, "learning_rate": 3.3676394442371363e-06, "loss": 0.3938, "step": 6347 }, { "epoch": 0.6178102189781022, "grad_norm": 1.5856184615021809, "learning_rate": 3.366149732646661e-06, "loss": 0.336, "step": 6348 }, { "epoch": 0.6179075425790754, "grad_norm": 1.2525495278013308, "learning_rate": 3.3646601834128924e-06, "loss": 0.2742, "step": 6349 }, { "epoch": 0.6180048661800487, "grad_norm": 2.6474231138943267, "learning_rate": 3.3631707966838455e-06, "loss": 0.2969, "step": 6350 }, { "epoch": 0.6181021897810219, "grad_norm": 1.4764349342423693, "learning_rate": 3.3616815726075246e-06, "loss": 0.4025, "step": 6351 }, { "epoch": 0.6181995133819951, "grad_norm": 1.516216909703192, "learning_rate": 3.360192511331911e-06, "loss": 0.4357, "step": 6352 }, { "epoch": 0.6182968369829683, "grad_norm": 1.5645375508781854, "learning_rate": 3.3587036130049755e-06, "loss": 0.4842, "step": 6353 }, { "epoch": 0.6183941605839416, "grad_norm": 1.6055309719827424, "learning_rate": 3.3572148777746725e-06, "loss": 0.3949, "step": 6354 }, { "epoch": 0.6184914841849148, "grad_norm": 1.4187572798353119, "learning_rate": 3.355726305788935e-06, "loss": 0.347, "step": 6355 }, { "epoch": 0.6185888077858881, "grad_norm": 1.4852764404080314, "learning_rate": 3.3542378971956855e-06, "loss": 0.4636, "step": 6356 }, { "epoch": 0.6186861313868613, "grad_norm": 1.4680545865026327, "learning_rate": 3.352749652142827e-06, "loss": 0.3263, "step": 6357 }, { "epoch": 0.6187834549878346, "grad_norm": 1.2909199596978085, "learning_rate": 3.35126157077825e-06, "loss": 0.221, "step": 6358 }, { "epoch": 0.6188807785888077, "grad_norm": 1.2858512877329278, "learning_rate": 3.3497736532498228e-06, "loss": 0.4425, "step": 6359 }, { "epoch": 0.618978102189781, "grad_norm": 1.4679780920197352, "learning_rate": 3.348285899705402e-06, "loss": 0.3828, "step": 6360 }, { "epoch": 0.6190754257907543, "grad_norm": 1.4904109905171448, "learning_rate": 3.3467983102928264e-06, "loss": 0.5438, "step": 6361 }, { "epoch": 0.6191727493917275, "grad_norm": 1.5466551961692698, "learning_rate": 3.345310885159921e-06, "loss": 0.4711, "step": 6362 }, { "epoch": 0.6192700729927008, "grad_norm": 1.6000010251995618, "learning_rate": 3.3438236244544876e-06, "loss": 0.4009, "step": 6363 }, { "epoch": 0.619367396593674, "grad_norm": 1.380310294745672, "learning_rate": 3.342336528324318e-06, "loss": 0.3238, "step": 6364 }, { "epoch": 0.6194647201946472, "grad_norm": 1.4194772646038305, "learning_rate": 3.3408495969171895e-06, "loss": 0.277, "step": 6365 }, { "epoch": 0.6195620437956204, "grad_norm": 1.3779067180187112, "learning_rate": 3.3393628303808546e-06, "loss": 0.417, "step": 6366 }, { "epoch": 0.6196593673965937, "grad_norm": 1.5475185096139097, "learning_rate": 3.337876228863055e-06, "loss": 0.549, "step": 6367 }, { "epoch": 0.6197566909975669, "grad_norm": 1.601875683936525, "learning_rate": 3.3363897925115174e-06, "loss": 0.578, "step": 6368 }, { "epoch": 0.6198540145985402, "grad_norm": 1.485607719529514, "learning_rate": 3.334903521473949e-06, "loss": 0.254, "step": 6369 }, { "epoch": 0.6199513381995134, "grad_norm": 1.6975560965840877, "learning_rate": 3.33341741589804e-06, "loss": 0.6447, "step": 6370 }, { "epoch": 0.6200486618004866, "grad_norm": 1.5440973331279908, "learning_rate": 3.331931475931467e-06, "loss": 0.533, "step": 6371 }, { "epoch": 0.6201459854014598, "grad_norm": 1.489907965153857, "learning_rate": 3.33044570172189e-06, "loss": 0.4217, "step": 6372 }, { "epoch": 0.6202433090024331, "grad_norm": 1.3357390317671818, "learning_rate": 3.3289600934169488e-06, "loss": 0.3263, "step": 6373 }, { "epoch": 0.6203406326034063, "grad_norm": 1.405734158318971, "learning_rate": 3.327474651164268e-06, "loss": 0.4839, "step": 6374 }, { "epoch": 0.6204379562043796, "grad_norm": 1.72369789238416, "learning_rate": 3.3259893751114607e-06, "loss": 0.3278, "step": 6375 }, { "epoch": 0.6205352798053528, "grad_norm": 1.6631384777969438, "learning_rate": 3.32450426540612e-06, "loss": 0.4216, "step": 6376 }, { "epoch": 0.6206326034063261, "grad_norm": 1.306876624538264, "learning_rate": 3.3230193221958185e-06, "loss": 0.3602, "step": 6377 }, { "epoch": 0.6207299270072992, "grad_norm": 1.8326922365035092, "learning_rate": 3.321534545628118e-06, "loss": 0.3393, "step": 6378 }, { "epoch": 0.6208272506082725, "grad_norm": 1.3059504443839534, "learning_rate": 3.3200499358505612e-06, "loss": 0.3376, "step": 6379 }, { "epoch": 0.6209245742092457, "grad_norm": 1.2028159202790685, "learning_rate": 3.318565493010676e-06, "loss": 0.3857, "step": 6380 }, { "epoch": 0.621021897810219, "grad_norm": 1.5256994425713408, "learning_rate": 3.3170812172559695e-06, "loss": 0.5269, "step": 6381 }, { "epoch": 0.6211192214111922, "grad_norm": 1.5735523716078301, "learning_rate": 3.3155971087339373e-06, "loss": 0.5115, "step": 6382 }, { "epoch": 0.6212165450121655, "grad_norm": 1.480129233208057, "learning_rate": 3.314113167592058e-06, "loss": 0.2543, "step": 6383 }, { "epoch": 0.6213138686131386, "grad_norm": 1.4854674485324186, "learning_rate": 3.3126293939777865e-06, "loss": 0.4638, "step": 6384 }, { "epoch": 0.6214111922141119, "grad_norm": 1.6127885338551649, "learning_rate": 3.311145788038569e-06, "loss": 0.1873, "step": 6385 }, { "epoch": 0.6215085158150851, "grad_norm": 1.408793543876312, "learning_rate": 3.309662349921834e-06, "loss": 0.4188, "step": 6386 }, { "epoch": 0.6216058394160584, "grad_norm": 1.312927812153489, "learning_rate": 3.3081790797749915e-06, "loss": 0.3685, "step": 6387 }, { "epoch": 0.6217031630170317, "grad_norm": 1.286380080753042, "learning_rate": 3.3066959777454324e-06, "loss": 0.3, "step": 6388 }, { "epoch": 0.6218004866180049, "grad_norm": 1.4640736093985107, "learning_rate": 3.305213043980534e-06, "loss": 0.2843, "step": 6389 }, { "epoch": 0.621897810218978, "grad_norm": 1.601117476995483, "learning_rate": 3.3037302786276584e-06, "loss": 0.3321, "step": 6390 }, { "epoch": 0.6219951338199513, "grad_norm": 1.5629534254206616, "learning_rate": 3.3022476818341466e-06, "loss": 0.4059, "step": 6391 }, { "epoch": 0.6220924574209246, "grad_norm": 1.375427613090092, "learning_rate": 3.300765253747327e-06, "loss": 0.376, "step": 6392 }, { "epoch": 0.6221897810218978, "grad_norm": 1.520247028505382, "learning_rate": 3.2992829945145076e-06, "loss": 0.4351, "step": 6393 }, { "epoch": 0.6222871046228711, "grad_norm": 1.6734811073697942, "learning_rate": 3.2978009042829843e-06, "loss": 0.3744, "step": 6394 }, { "epoch": 0.6223844282238443, "grad_norm": 1.4250547465465695, "learning_rate": 3.2963189832000286e-06, "loss": 0.3529, "step": 6395 }, { "epoch": 0.6224817518248175, "grad_norm": 1.5534281014660964, "learning_rate": 3.294837231412904e-06, "loss": 0.4346, "step": 6396 }, { "epoch": 0.6225790754257907, "grad_norm": 1.5199040051313941, "learning_rate": 3.2933556490688515e-06, "loss": 0.552, "step": 6397 }, { "epoch": 0.622676399026764, "grad_norm": 1.4811874698365435, "learning_rate": 3.2918742363150996e-06, "loss": 0.4781, "step": 6398 }, { "epoch": 0.6227737226277372, "grad_norm": 1.5829534409520465, "learning_rate": 3.290392993298852e-06, "loss": 0.4083, "step": 6399 }, { "epoch": 0.6228710462287105, "grad_norm": 1.4110587235903005, "learning_rate": 3.2889119201673043e-06, "loss": 0.447, "step": 6400 }, { "epoch": 0.6229683698296837, "grad_norm": 1.4361938679402597, "learning_rate": 3.2874310170676316e-06, "loss": 0.4102, "step": 6401 }, { "epoch": 0.623065693430657, "grad_norm": 1.6223143612737465, "learning_rate": 3.28595028414699e-06, "loss": 0.4837, "step": 6402 }, { "epoch": 0.6231630170316301, "grad_norm": 1.4197191935920161, "learning_rate": 3.2844697215525224e-06, "loss": 0.237, "step": 6403 }, { "epoch": 0.6232603406326034, "grad_norm": 1.5890665990125814, "learning_rate": 3.282989329431353e-06, "loss": 0.5402, "step": 6404 }, { "epoch": 0.6233576642335766, "grad_norm": 1.4318873256650708, "learning_rate": 3.28150910793059e-06, "loss": 0.4189, "step": 6405 }, { "epoch": 0.6234549878345499, "grad_norm": 1.2893337542343504, "learning_rate": 3.2800290571973225e-06, "loss": 0.2076, "step": 6406 }, { "epoch": 0.6235523114355231, "grad_norm": 1.7394992632928765, "learning_rate": 3.278549177378625e-06, "loss": 0.4858, "step": 6407 }, { "epoch": 0.6236496350364964, "grad_norm": 1.7877530299008864, "learning_rate": 3.2770694686215555e-06, "loss": 0.3758, "step": 6408 }, { "epoch": 0.6237469586374695, "grad_norm": 1.3889300700546277, "learning_rate": 3.27558993107315e-06, "loss": 0.3385, "step": 6409 }, { "epoch": 0.6238442822384428, "grad_norm": 1.5311081489721348, "learning_rate": 3.2741105648804326e-06, "loss": 0.5021, "step": 6410 }, { "epoch": 0.623941605839416, "grad_norm": 1.6357991769456734, "learning_rate": 3.27263137019041e-06, "loss": 0.3163, "step": 6411 }, { "epoch": 0.6240389294403893, "grad_norm": 1.5531807503777904, "learning_rate": 3.27115234715007e-06, "loss": 0.4715, "step": 6412 }, { "epoch": 0.6241362530413626, "grad_norm": 1.595618256234525, "learning_rate": 3.2696734959063836e-06, "loss": 0.2676, "step": 6413 }, { "epoch": 0.6242335766423358, "grad_norm": 1.2586532057790611, "learning_rate": 3.268194816606305e-06, "loss": 0.3499, "step": 6414 }, { "epoch": 0.624330900243309, "grad_norm": 1.3478748089021555, "learning_rate": 3.266716309396772e-06, "loss": 0.3094, "step": 6415 }, { "epoch": 0.6244282238442822, "grad_norm": 1.6488441580377198, "learning_rate": 3.2652379744247053e-06, "loss": 0.2998, "step": 6416 }, { "epoch": 0.6245255474452555, "grad_norm": 1.6076493872823139, "learning_rate": 3.2637598118370075e-06, "loss": 0.3228, "step": 6417 }, { "epoch": 0.6246228710462287, "grad_norm": 1.5158291708686529, "learning_rate": 3.2622818217805634e-06, "loss": 0.4085, "step": 6418 }, { "epoch": 0.624720194647202, "grad_norm": 1.4341934155650422, "learning_rate": 3.260804004402245e-06, "loss": 0.2319, "step": 6419 }, { "epoch": 0.6248175182481752, "grad_norm": 1.663752280677247, "learning_rate": 3.2593263598489e-06, "loss": 0.4452, "step": 6420 }, { "epoch": 0.6249148418491485, "grad_norm": 1.2769632035094298, "learning_rate": 3.257848888267364e-06, "loss": 0.3209, "step": 6421 }, { "epoch": 0.6250121654501216, "grad_norm": 1.4916826762329467, "learning_rate": 3.256371589804455e-06, "loss": 0.4286, "step": 6422 }, { "epoch": 0.6251094890510949, "grad_norm": 1.36549468826328, "learning_rate": 3.2548944646069743e-06, "loss": 0.4467, "step": 6423 }, { "epoch": 0.6252068126520681, "grad_norm": 1.3602976682219388, "learning_rate": 3.2534175128217016e-06, "loss": 0.4956, "step": 6424 }, { "epoch": 0.6253041362530414, "grad_norm": 1.4692369830606595, "learning_rate": 3.2519407345954048e-06, "loss": 0.4828, "step": 6425 }, { "epoch": 0.6254014598540146, "grad_norm": 1.0896644581534791, "learning_rate": 3.2504641300748317e-06, "loss": 0.2557, "step": 6426 }, { "epoch": 0.6254987834549879, "grad_norm": 1.3661247264391625, "learning_rate": 3.2489876994067127e-06, "loss": 0.3301, "step": 6427 }, { "epoch": 0.625596107055961, "grad_norm": 1.6193965254088782, "learning_rate": 3.2475114427377628e-06, "loss": 0.5323, "step": 6428 }, { "epoch": 0.6256934306569343, "grad_norm": 1.58042754846969, "learning_rate": 3.246035360214678e-06, "loss": 0.3914, "step": 6429 }, { "epoch": 0.6257907542579075, "grad_norm": 1.5398264117065557, "learning_rate": 3.244559451984139e-06, "loss": 0.3135, "step": 6430 }, { "epoch": 0.6258880778588808, "grad_norm": 1.393062042004862, "learning_rate": 3.243083718192804e-06, "loss": 0.4127, "step": 6431 }, { "epoch": 0.625985401459854, "grad_norm": 1.374180419456703, "learning_rate": 3.2416081589873204e-06, "loss": 0.4203, "step": 6432 }, { "epoch": 0.6260827250608273, "grad_norm": 1.5221690056463797, "learning_rate": 3.240132774514314e-06, "loss": 0.3567, "step": 6433 }, { "epoch": 0.6261800486618004, "grad_norm": 1.7565693167917438, "learning_rate": 3.238657564920398e-06, "loss": 0.4339, "step": 6434 }, { "epoch": 0.6262773722627737, "grad_norm": 1.4458477781748666, "learning_rate": 3.2371825303521608e-06, "loss": 0.3381, "step": 6435 }, { "epoch": 0.6263746958637469, "grad_norm": 1.0579358974139734, "learning_rate": 3.2357076709561785e-06, "loss": 0.2131, "step": 6436 }, { "epoch": 0.6264720194647202, "grad_norm": 1.411446443439624, "learning_rate": 3.2342329868790113e-06, "loss": 0.3913, "step": 6437 }, { "epoch": 0.6265693430656935, "grad_norm": 1.6804640992905848, "learning_rate": 3.2327584782671954e-06, "loss": 0.4483, "step": 6438 }, { "epoch": 0.6266666666666667, "grad_norm": 1.3697314327713126, "learning_rate": 3.2312841452672565e-06, "loss": 0.4149, "step": 6439 }, { "epoch": 0.6267639902676398, "grad_norm": 1.4377594589269822, "learning_rate": 3.2298099880256996e-06, "loss": 0.2782, "step": 6440 }, { "epoch": 0.6268613138686131, "grad_norm": 1.2400485642982848, "learning_rate": 3.2283360066890135e-06, "loss": 0.3476, "step": 6441 }, { "epoch": 0.6269586374695864, "grad_norm": 1.4012086147216676, "learning_rate": 3.2268622014036654e-06, "loss": 0.3381, "step": 6442 }, { "epoch": 0.6270559610705596, "grad_norm": 1.9163727811432625, "learning_rate": 3.22538857231611e-06, "loss": 0.2783, "step": 6443 }, { "epoch": 0.6271532846715329, "grad_norm": 1.5427758977580395, "learning_rate": 3.2239151195727824e-06, "loss": 0.445, "step": 6444 }, { "epoch": 0.6272506082725061, "grad_norm": 1.3443867261615499, "learning_rate": 3.2224418433201036e-06, "loss": 0.3202, "step": 6445 }, { "epoch": 0.6273479318734794, "grad_norm": 1.4345988977751094, "learning_rate": 3.22096874370447e-06, "loss": 0.4615, "step": 6446 }, { "epoch": 0.6274452554744525, "grad_norm": 1.5837685923700648, "learning_rate": 3.2194958208722656e-06, "loss": 0.4776, "step": 6447 }, { "epoch": 0.6275425790754258, "grad_norm": 1.3815783149316243, "learning_rate": 3.2180230749698565e-06, "loss": 0.41, "step": 6448 }, { "epoch": 0.627639902676399, "grad_norm": 1.237087793767247, "learning_rate": 3.2165505061435887e-06, "loss": 0.2476, "step": 6449 }, { "epoch": 0.6277372262773723, "grad_norm": 1.2584796818308166, "learning_rate": 3.2150781145397937e-06, "loss": 0.3838, "step": 6450 }, { "epoch": 0.6278345498783455, "grad_norm": 1.5613599051036826, "learning_rate": 3.213605900304784e-06, "loss": 0.3109, "step": 6451 }, { "epoch": 0.6279318734793188, "grad_norm": 1.2250805302833447, "learning_rate": 3.2121338635848553e-06, "loss": 0.2798, "step": 6452 }, { "epoch": 0.6280291970802919, "grad_norm": 1.6756000554221138, "learning_rate": 3.2106620045262813e-06, "loss": 0.3721, "step": 6453 }, { "epoch": 0.6281265206812652, "grad_norm": 1.3234677506089279, "learning_rate": 3.209190323275323e-06, "loss": 0.2681, "step": 6454 }, { "epoch": 0.6282238442822384, "grad_norm": 1.5906825746559898, "learning_rate": 3.207718819978226e-06, "loss": 0.4974, "step": 6455 }, { "epoch": 0.6283211678832117, "grad_norm": 1.5094469925986025, "learning_rate": 3.2062474947812096e-06, "loss": 0.3998, "step": 6456 }, { "epoch": 0.6284184914841849, "grad_norm": 1.5482315126630701, "learning_rate": 3.204776347830482e-06, "loss": 0.5504, "step": 6457 }, { "epoch": 0.6285158150851582, "grad_norm": 1.3480322271903677, "learning_rate": 3.2033053792722326e-06, "loss": 0.2613, "step": 6458 }, { "epoch": 0.6286131386861313, "grad_norm": 1.5305125737986958, "learning_rate": 3.2018345892526327e-06, "loss": 0.3946, "step": 6459 }, { "epoch": 0.6287104622871046, "grad_norm": 1.3806290279276137, "learning_rate": 3.2003639779178334e-06, "loss": 0.3694, "step": 6460 }, { "epoch": 0.6288077858880778, "grad_norm": 1.6224067977444971, "learning_rate": 3.1988935454139723e-06, "loss": 0.5805, "step": 6461 }, { "epoch": 0.6289051094890511, "grad_norm": 1.5657592350411667, "learning_rate": 3.1974232918871666e-06, "loss": 0.317, "step": 6462 }, { "epoch": 0.6290024330900243, "grad_norm": 3.5127424163247816, "learning_rate": 3.1959532174835186e-06, "loss": 0.5635, "step": 6463 }, { "epoch": 0.6290997566909976, "grad_norm": 1.2858706636545765, "learning_rate": 3.1944833223491046e-06, "loss": 0.2784, "step": 6464 }, { "epoch": 0.6291970802919709, "grad_norm": 1.3540634213727698, "learning_rate": 3.1930136066299945e-06, "loss": 0.3003, "step": 6465 }, { "epoch": 0.629294403892944, "grad_norm": 1.170333153169365, "learning_rate": 3.1915440704722343e-06, "loss": 0.323, "step": 6466 }, { "epoch": 0.6293917274939173, "grad_norm": 1.3124886466852246, "learning_rate": 3.190074714021849e-06, "loss": 0.321, "step": 6467 }, { "epoch": 0.6294890510948905, "grad_norm": 1.6243528397895342, "learning_rate": 3.1886055374248526e-06, "loss": 0.2533, "step": 6468 }, { "epoch": 0.6295863746958638, "grad_norm": 1.4883999514363708, "learning_rate": 3.187136540827237e-06, "loss": 0.3649, "step": 6469 }, { "epoch": 0.629683698296837, "grad_norm": 1.427487040125178, "learning_rate": 3.185667724374979e-06, "loss": 0.3898, "step": 6470 }, { "epoch": 0.6297810218978103, "grad_norm": 1.115881950643076, "learning_rate": 3.184199088214033e-06, "loss": 0.2074, "step": 6471 }, { "epoch": 0.6298783454987834, "grad_norm": 1.6919065797054764, "learning_rate": 3.1827306324903395e-06, "loss": 0.2688, "step": 6472 }, { "epoch": 0.6299756690997567, "grad_norm": 1.3599466897529593, "learning_rate": 3.1812623573498226e-06, "loss": 0.3131, "step": 6473 }, { "epoch": 0.6300729927007299, "grad_norm": 1.3631234063966051, "learning_rate": 3.1797942629383793e-06, "loss": 0.2478, "step": 6474 }, { "epoch": 0.6301703163017032, "grad_norm": 1.7283042994052793, "learning_rate": 3.1783263494019005e-06, "loss": 0.3768, "step": 6475 }, { "epoch": 0.6302676399026764, "grad_norm": 1.4281723248342992, "learning_rate": 3.1768586168862525e-06, "loss": 0.3077, "step": 6476 }, { "epoch": 0.6303649635036497, "grad_norm": 2.900169985820706, "learning_rate": 3.1753910655372855e-06, "loss": 0.3586, "step": 6477 }, { "epoch": 0.6304622871046228, "grad_norm": 1.672337334424629, "learning_rate": 3.173923695500828e-06, "loss": 0.435, "step": 6478 }, { "epoch": 0.6305596107055961, "grad_norm": 1.5938101364458683, "learning_rate": 3.1724565069226955e-06, "loss": 0.5215, "step": 6479 }, { "epoch": 0.6306569343065693, "grad_norm": 1.4436062200383195, "learning_rate": 3.1709894999486828e-06, "loss": 0.4094, "step": 6480 }, { "epoch": 0.6307542579075426, "grad_norm": 1.4529861876112795, "learning_rate": 3.1695226747245687e-06, "loss": 0.2817, "step": 6481 }, { "epoch": 0.6308515815085158, "grad_norm": 1.5483022713531014, "learning_rate": 3.168056031396111e-06, "loss": 0.3348, "step": 6482 }, { "epoch": 0.6309489051094891, "grad_norm": 1.292264164376706, "learning_rate": 3.1665895701090516e-06, "loss": 0.4504, "step": 6483 }, { "epoch": 0.6310462287104623, "grad_norm": 1.2803892640445205, "learning_rate": 3.165123291009114e-06, "loss": 0.1939, "step": 6484 }, { "epoch": 0.6311435523114355, "grad_norm": 1.6429724475131509, "learning_rate": 3.163657194242002e-06, "loss": 0.3441, "step": 6485 }, { "epoch": 0.6312408759124087, "grad_norm": 1.3659913852581975, "learning_rate": 3.162191279953403e-06, "loss": 0.3171, "step": 6486 }, { "epoch": 0.631338199513382, "grad_norm": 1.2837470772774027, "learning_rate": 3.1607255482889865e-06, "loss": 0.1758, "step": 6487 }, { "epoch": 0.6314355231143552, "grad_norm": 1.6139208100897777, "learning_rate": 3.159259999394405e-06, "loss": 0.4519, "step": 6488 }, { "epoch": 0.6315328467153285, "grad_norm": 1.7475421539155827, "learning_rate": 3.1577946334152867e-06, "loss": 0.2733, "step": 6489 }, { "epoch": 0.6316301703163018, "grad_norm": 1.4912354792665754, "learning_rate": 3.1563294504972474e-06, "loss": 0.4201, "step": 6490 }, { "epoch": 0.6317274939172749, "grad_norm": 1.6382570162150927, "learning_rate": 3.154864450785885e-06, "loss": 0.4395, "step": 6491 }, { "epoch": 0.6318248175182481, "grad_norm": 1.5663588366859882, "learning_rate": 3.1533996344267753e-06, "loss": 0.5681, "step": 6492 }, { "epoch": 0.6319221411192214, "grad_norm": 1.4907756737266007, "learning_rate": 3.1519350015654793e-06, "loss": 0.4187, "step": 6493 }, { "epoch": 0.6320194647201947, "grad_norm": 1.4455849880382687, "learning_rate": 3.1504705523475377e-06, "loss": 0.3222, "step": 6494 }, { "epoch": 0.6321167883211679, "grad_norm": 1.4603444900127154, "learning_rate": 3.1490062869184747e-06, "loss": 0.3556, "step": 6495 }, { "epoch": 0.6322141119221412, "grad_norm": 1.5259510139212225, "learning_rate": 3.1475422054237948e-06, "loss": 0.3879, "step": 6496 }, { "epoch": 0.6323114355231143, "grad_norm": 1.4348064563236933, "learning_rate": 3.1460783080089835e-06, "loss": 0.322, "step": 6497 }, { "epoch": 0.6324087591240876, "grad_norm": 1.464928220220762, "learning_rate": 3.1446145948195104e-06, "loss": 0.2428, "step": 6498 }, { "epoch": 0.6325060827250608, "grad_norm": 2.010405176423069, "learning_rate": 3.143151066000828e-06, "loss": 0.4493, "step": 6499 }, { "epoch": 0.6326034063260341, "grad_norm": 1.6763377299382285, "learning_rate": 3.141687721698363e-06, "loss": 0.3089, "step": 6500 }, { "epoch": 0.6327007299270073, "grad_norm": 1.2933149997814406, "learning_rate": 3.140224562057532e-06, "loss": 0.2106, "step": 6501 }, { "epoch": 0.6327980535279806, "grad_norm": 1.3798692490652842, "learning_rate": 3.13876158722373e-06, "loss": 0.2742, "step": 6502 }, { "epoch": 0.6328953771289537, "grad_norm": 1.6893586413473158, "learning_rate": 3.137298797342332e-06, "loss": 0.4408, "step": 6503 }, { "epoch": 0.632992700729927, "grad_norm": 1.471219847182982, "learning_rate": 3.135836192558697e-06, "loss": 0.3318, "step": 6504 }, { "epoch": 0.6330900243309002, "grad_norm": 1.2796389057029516, "learning_rate": 3.1343737730181655e-06, "loss": 0.4114, "step": 6505 }, { "epoch": 0.6331873479318735, "grad_norm": 1.8869923142527827, "learning_rate": 3.13291153886606e-06, "loss": 0.3791, "step": 6506 }, { "epoch": 0.6332846715328467, "grad_norm": 1.4539433297784599, "learning_rate": 3.131449490247682e-06, "loss": 0.3876, "step": 6507 }, { "epoch": 0.63338199513382, "grad_norm": 1.3341134094117948, "learning_rate": 3.1299876273083164e-06, "loss": 0.3685, "step": 6508 }, { "epoch": 0.6334793187347932, "grad_norm": 1.4170620964782272, "learning_rate": 3.128525950193232e-06, "loss": 0.4013, "step": 6509 }, { "epoch": 0.6335766423357664, "grad_norm": 1.5530589406262345, "learning_rate": 3.127064459047671e-06, "loss": 0.368, "step": 6510 }, { "epoch": 0.6336739659367396, "grad_norm": 1.5483044271710225, "learning_rate": 3.125603154016867e-06, "loss": 0.4434, "step": 6511 }, { "epoch": 0.6337712895377129, "grad_norm": 1.3264657819916343, "learning_rate": 3.1241420352460296e-06, "loss": 0.3797, "step": 6512 }, { "epoch": 0.6338686131386861, "grad_norm": 1.4633775214009548, "learning_rate": 3.1226811028803514e-06, "loss": 0.3799, "step": 6513 }, { "epoch": 0.6339659367396594, "grad_norm": 1.4275809021587014, "learning_rate": 3.121220357065006e-06, "loss": 0.3724, "step": 6514 }, { "epoch": 0.6340632603406327, "grad_norm": 1.2579159901379062, "learning_rate": 3.1197597979451477e-06, "loss": 0.34, "step": 6515 }, { "epoch": 0.6341605839416058, "grad_norm": 1.767126339664398, "learning_rate": 3.118299425665914e-06, "loss": 0.5021, "step": 6516 }, { "epoch": 0.634257907542579, "grad_norm": 1.6146261230049028, "learning_rate": 3.116839240372424e-06, "loss": 0.6639, "step": 6517 }, { "epoch": 0.6343552311435523, "grad_norm": 1.5481263310931461, "learning_rate": 3.115379242209775e-06, "loss": 0.4425, "step": 6518 }, { "epoch": 0.6344525547445256, "grad_norm": 1.6350276832686754, "learning_rate": 3.1139194313230497e-06, "loss": 0.4323, "step": 6519 }, { "epoch": 0.6345498783454988, "grad_norm": 1.681221053430889, "learning_rate": 3.1124598078573115e-06, "loss": 0.2437, "step": 6520 }, { "epoch": 0.6346472019464721, "grad_norm": 1.4748162332920707, "learning_rate": 3.1110003719576005e-06, "loss": 0.37, "step": 6521 }, { "epoch": 0.6347445255474452, "grad_norm": 1.3748894993856542, "learning_rate": 3.109541123768943e-06, "loss": 0.2493, "step": 6522 }, { "epoch": 0.6348418491484185, "grad_norm": 1.4376733094936145, "learning_rate": 3.108082063436346e-06, "loss": 0.2908, "step": 6523 }, { "epoch": 0.6349391727493917, "grad_norm": 1.6379363781656653, "learning_rate": 3.1066231911047996e-06, "loss": 0.4498, "step": 6524 }, { "epoch": 0.635036496350365, "grad_norm": 1.683708381733199, "learning_rate": 3.105164506919268e-06, "loss": 0.3291, "step": 6525 }, { "epoch": 0.6351338199513382, "grad_norm": 1.4308202873054392, "learning_rate": 3.1037060110247053e-06, "loss": 0.418, "step": 6526 }, { "epoch": 0.6352311435523115, "grad_norm": 1.263260032838939, "learning_rate": 3.1022477035660413e-06, "loss": 0.2807, "step": 6527 }, { "epoch": 0.6353284671532847, "grad_norm": 1.183934128166101, "learning_rate": 3.1007895846881896e-06, "loss": 0.3428, "step": 6528 }, { "epoch": 0.6354257907542579, "grad_norm": 1.1839227502404823, "learning_rate": 3.099331654536044e-06, "loss": 0.3267, "step": 6529 }, { "epoch": 0.6355231143552311, "grad_norm": 1.462328895927904, "learning_rate": 3.0978739132544798e-06, "loss": 0.4651, "step": 6530 }, { "epoch": 0.6356204379562044, "grad_norm": 1.5613248602189018, "learning_rate": 3.0964163609883563e-06, "loss": 0.5058, "step": 6531 }, { "epoch": 0.6357177615571776, "grad_norm": 1.5514022390955555, "learning_rate": 3.094958997882507e-06, "loss": 0.4418, "step": 6532 }, { "epoch": 0.6358150851581509, "grad_norm": 1.50778032179577, "learning_rate": 3.0935018240817518e-06, "loss": 0.512, "step": 6533 }, { "epoch": 0.6359124087591241, "grad_norm": 1.3028402442750457, "learning_rate": 3.0920448397308932e-06, "loss": 0.2806, "step": 6534 }, { "epoch": 0.6360097323600973, "grad_norm": 1.4253549434913757, "learning_rate": 3.0905880449747138e-06, "loss": 0.3683, "step": 6535 }, { "epoch": 0.6361070559610705, "grad_norm": 1.6257959030642224, "learning_rate": 3.089131439957972e-06, "loss": 0.4056, "step": 6536 }, { "epoch": 0.6362043795620438, "grad_norm": 2.038223737180886, "learning_rate": 3.087675024825413e-06, "loss": 0.1905, "step": 6537 }, { "epoch": 0.636301703163017, "grad_norm": 1.4294957100055656, "learning_rate": 3.0862187997217643e-06, "loss": 0.4646, "step": 6538 }, { "epoch": 0.6363990267639903, "grad_norm": 1.5204127091474489, "learning_rate": 3.0847627647917277e-06, "loss": 0.3884, "step": 6539 }, { "epoch": 0.6364963503649635, "grad_norm": 1.470787420807994, "learning_rate": 3.0833069201799927e-06, "loss": 0.2999, "step": 6540 }, { "epoch": 0.6365936739659367, "grad_norm": 1.3680617670648643, "learning_rate": 3.0818512660312273e-06, "loss": 0.1966, "step": 6541 }, { "epoch": 0.6366909975669099, "grad_norm": 1.4095686007744734, "learning_rate": 3.0803958024900822e-06, "loss": 0.4465, "step": 6542 }, { "epoch": 0.6367883211678832, "grad_norm": 1.7020279036810935, "learning_rate": 3.078940529701183e-06, "loss": 0.3963, "step": 6543 }, { "epoch": 0.6368856447688565, "grad_norm": 1.6759286384412257, "learning_rate": 3.077485447809145e-06, "loss": 0.4328, "step": 6544 }, { "epoch": 0.6369829683698297, "grad_norm": 1.3607338000680083, "learning_rate": 3.076030556958563e-06, "loss": 0.3031, "step": 6545 }, { "epoch": 0.637080291970803, "grad_norm": 1.3654839938275702, "learning_rate": 3.0745758572940044e-06, "loss": 0.2692, "step": 6546 }, { "epoch": 0.6371776155717761, "grad_norm": 1.3285997327669636, "learning_rate": 3.073121348960026e-06, "loss": 0.4202, "step": 6547 }, { "epoch": 0.6372749391727494, "grad_norm": 1.491142266371717, "learning_rate": 3.0716670321011637e-06, "loss": 0.5475, "step": 6548 }, { "epoch": 0.6373722627737226, "grad_norm": 1.3959105266273333, "learning_rate": 3.0702129068619347e-06, "loss": 0.1799, "step": 6549 }, { "epoch": 0.6374695863746959, "grad_norm": 1.4953678451707477, "learning_rate": 3.068758973386834e-06, "loss": 0.2944, "step": 6550 }, { "epoch": 0.6375669099756691, "grad_norm": 1.3774626526382292, "learning_rate": 3.0673052318203415e-06, "loss": 0.4229, "step": 6551 }, { "epoch": 0.6376642335766424, "grad_norm": 3.0575530911418967, "learning_rate": 3.065851682306916e-06, "loss": 0.3802, "step": 6552 }, { "epoch": 0.6377615571776156, "grad_norm": 1.5266257547257827, "learning_rate": 3.0643983249910003e-06, "loss": 0.431, "step": 6553 }, { "epoch": 0.6378588807785888, "grad_norm": 1.4318729227784441, "learning_rate": 3.062945160017009e-06, "loss": 0.3329, "step": 6554 }, { "epoch": 0.637956204379562, "grad_norm": 1.477288849615939, "learning_rate": 3.0614921875293485e-06, "loss": 0.3838, "step": 6555 }, { "epoch": 0.6380535279805353, "grad_norm": 1.4366676574550201, "learning_rate": 3.0600394076724034e-06, "loss": 0.3418, "step": 6556 }, { "epoch": 0.6381508515815085, "grad_norm": 1.840787349982565, "learning_rate": 3.058586820590532e-06, "loss": 0.3175, "step": 6557 }, { "epoch": 0.6382481751824818, "grad_norm": 1.6374162084675348, "learning_rate": 3.057134426428082e-06, "loss": 0.5415, "step": 6558 }, { "epoch": 0.638345498783455, "grad_norm": 1.7509514402086164, "learning_rate": 3.055682225329378e-06, "loss": 0.3998, "step": 6559 }, { "epoch": 0.6384428223844282, "grad_norm": 1.3419721064191474, "learning_rate": 3.0542302174387285e-06, "loss": 0.3397, "step": 6560 }, { "epoch": 0.6385401459854014, "grad_norm": 1.4188823856648498, "learning_rate": 3.052778402900416e-06, "loss": 0.1553, "step": 6561 }, { "epoch": 0.6386374695863747, "grad_norm": 1.4080661490138804, "learning_rate": 3.051326781858711e-06, "loss": 0.2777, "step": 6562 }, { "epoch": 0.6387347931873479, "grad_norm": 1.5492062197364629, "learning_rate": 3.0498753544578636e-06, "loss": 0.4784, "step": 6563 }, { "epoch": 0.6388321167883212, "grad_norm": 1.8418367448295987, "learning_rate": 3.0484241208420974e-06, "loss": 0.4083, "step": 6564 }, { "epoch": 0.6389294403892944, "grad_norm": 1.6392677142297458, "learning_rate": 3.046973081155627e-06, "loss": 0.483, "step": 6565 }, { "epoch": 0.6390267639902676, "grad_norm": 1.325005041868586, "learning_rate": 3.0455222355426417e-06, "loss": 0.3442, "step": 6566 }, { "epoch": 0.6391240875912408, "grad_norm": 1.5095000631832123, "learning_rate": 3.0440715841473156e-06, "loss": 0.3789, "step": 6567 }, { "epoch": 0.6392214111922141, "grad_norm": 1.3463836410906744, "learning_rate": 3.0426211271137963e-06, "loss": 0.2513, "step": 6568 }, { "epoch": 0.6393187347931873, "grad_norm": 1.4654952978073685, "learning_rate": 3.041170864586218e-06, "loss": 0.2815, "step": 6569 }, { "epoch": 0.6394160583941606, "grad_norm": 1.2085160194064402, "learning_rate": 3.0397207967086963e-06, "loss": 0.2626, "step": 6570 }, { "epoch": 0.6395133819951339, "grad_norm": 1.2129154909198758, "learning_rate": 3.0382709236253236e-06, "loss": 0.3191, "step": 6571 }, { "epoch": 0.6396107055961071, "grad_norm": 1.2824407022461222, "learning_rate": 3.0368212454801747e-06, "loss": 0.2703, "step": 6572 }, { "epoch": 0.6397080291970803, "grad_norm": 2.2236201604685077, "learning_rate": 3.0353717624173052e-06, "loss": 0.2915, "step": 6573 }, { "epoch": 0.6398053527980535, "grad_norm": 1.6125491053474876, "learning_rate": 3.0339224745807523e-06, "loss": 0.409, "step": 6574 }, { "epoch": 0.6399026763990268, "grad_norm": 1.742793572213448, "learning_rate": 3.0324733821145303e-06, "loss": 0.4993, "step": 6575 }, { "epoch": 0.64, "grad_norm": 1.3630390115977793, "learning_rate": 3.0310244851626376e-06, "loss": 0.3173, "step": 6576 }, { "epoch": 0.6400973236009733, "grad_norm": 1.500298947108402, "learning_rate": 3.029575783869052e-06, "loss": 0.4725, "step": 6577 }, { "epoch": 0.6401946472019465, "grad_norm": 1.705190193889179, "learning_rate": 3.0281272783777343e-06, "loss": 0.3429, "step": 6578 }, { "epoch": 0.6402919708029197, "grad_norm": 1.448392648012749, "learning_rate": 3.0266789688326187e-06, "loss": 0.336, "step": 6579 }, { "epoch": 0.6403892944038929, "grad_norm": 1.3178412429464779, "learning_rate": 3.0252308553776264e-06, "loss": 0.2979, "step": 6580 }, { "epoch": 0.6404866180048662, "grad_norm": 1.3242172196190918, "learning_rate": 3.0237829381566586e-06, "loss": 0.2406, "step": 6581 }, { "epoch": 0.6405839416058394, "grad_norm": 1.2949004250615508, "learning_rate": 3.0223352173135957e-06, "loss": 0.2948, "step": 6582 }, { "epoch": 0.6406812652068127, "grad_norm": 1.391976289322491, "learning_rate": 3.020887692992297e-06, "loss": 0.2993, "step": 6583 }, { "epoch": 0.6407785888077859, "grad_norm": 1.4327058043112526, "learning_rate": 3.0194403653366046e-06, "loss": 0.4787, "step": 6584 }, { "epoch": 0.6408759124087591, "grad_norm": 1.352759211832976, "learning_rate": 3.0179932344903406e-06, "loss": 0.3548, "step": 6585 }, { "epoch": 0.6409732360097323, "grad_norm": 1.4721962288695103, "learning_rate": 3.0165463005973074e-06, "loss": 0.3982, "step": 6586 }, { "epoch": 0.6410705596107056, "grad_norm": 1.5071786131133138, "learning_rate": 3.0150995638012863e-06, "loss": 0.2905, "step": 6587 }, { "epoch": 0.6411678832116788, "grad_norm": 1.7834773275882259, "learning_rate": 3.0136530242460422e-06, "loss": 0.5121, "step": 6588 }, { "epoch": 0.6412652068126521, "grad_norm": 1.3996326305129958, "learning_rate": 3.01220668207532e-06, "loss": 0.3202, "step": 6589 }, { "epoch": 0.6413625304136253, "grad_norm": 1.3538756228954372, "learning_rate": 3.0107605374328393e-06, "loss": 0.3381, "step": 6590 }, { "epoch": 0.6414598540145985, "grad_norm": 1.7326099934416226, "learning_rate": 3.0093145904623067e-06, "loss": 0.4123, "step": 6591 }, { "epoch": 0.6415571776155717, "grad_norm": 1.5533762247005998, "learning_rate": 3.007868841307408e-06, "loss": 0.2857, "step": 6592 }, { "epoch": 0.641654501216545, "grad_norm": 1.377517562865021, "learning_rate": 3.0064232901118064e-06, "loss": 0.2983, "step": 6593 }, { "epoch": 0.6417518248175182, "grad_norm": 1.3605307565764377, "learning_rate": 3.0049779370191467e-06, "loss": 0.3055, "step": 6594 }, { "epoch": 0.6418491484184915, "grad_norm": 1.4541476353198792, "learning_rate": 3.0035327821730563e-06, "loss": 0.3364, "step": 6595 }, { "epoch": 0.6419464720194648, "grad_norm": 1.6858716101582292, "learning_rate": 3.0020878257171415e-06, "loss": 0.3286, "step": 6596 }, { "epoch": 0.642043795620438, "grad_norm": 1.2361037305309155, "learning_rate": 3.0006430677949868e-06, "loss": 0.2427, "step": 6597 }, { "epoch": 0.6421411192214111, "grad_norm": 1.5622620973672636, "learning_rate": 2.999198508550159e-06, "loss": 0.4313, "step": 6598 }, { "epoch": 0.6422384428223844, "grad_norm": 1.6444258486208945, "learning_rate": 2.997754148126205e-06, "loss": 0.472, "step": 6599 }, { "epoch": 0.6423357664233577, "grad_norm": 1.220898031314922, "learning_rate": 2.9963099866666543e-06, "loss": 0.2771, "step": 6600 }, { "epoch": 0.6424330900243309, "grad_norm": 1.3733291010343895, "learning_rate": 2.9948660243150098e-06, "loss": 0.3123, "step": 6601 }, { "epoch": 0.6425304136253042, "grad_norm": 1.3566622358073268, "learning_rate": 2.9934222612147595e-06, "loss": 0.3548, "step": 6602 }, { "epoch": 0.6426277372262774, "grad_norm": 1.4428606836553397, "learning_rate": 2.9919786975093756e-06, "loss": 0.4486, "step": 6603 }, { "epoch": 0.6427250608272506, "grad_norm": 1.331557651126881, "learning_rate": 2.9905353333423014e-06, "loss": 0.3721, "step": 6604 }, { "epoch": 0.6428223844282238, "grad_norm": 1.593785827832394, "learning_rate": 2.989092168856965e-06, "loss": 0.4927, "step": 6605 }, { "epoch": 0.6429197080291971, "grad_norm": 1.536653598942224, "learning_rate": 2.987649204196777e-06, "loss": 0.3153, "step": 6606 }, { "epoch": 0.6430170316301703, "grad_norm": 1.3054514036276021, "learning_rate": 2.9862064395051248e-06, "loss": 0.3167, "step": 6607 }, { "epoch": 0.6431143552311436, "grad_norm": 1.757420067956881, "learning_rate": 2.984763874925376e-06, "loss": 0.4285, "step": 6608 }, { "epoch": 0.6432116788321168, "grad_norm": 1.2420031332753536, "learning_rate": 2.9833215106008794e-06, "loss": 0.173, "step": 6609 }, { "epoch": 0.64330900243309, "grad_norm": 1.4602255488933027, "learning_rate": 2.981879346674965e-06, "loss": 0.3907, "step": 6610 }, { "epoch": 0.6434063260340632, "grad_norm": 6.231283621281225, "learning_rate": 2.9804373832909394e-06, "loss": 0.3185, "step": 6611 }, { "epoch": 0.6435036496350365, "grad_norm": 1.478715809985492, "learning_rate": 2.978995620592092e-06, "loss": 0.4662, "step": 6612 }, { "epoch": 0.6436009732360097, "grad_norm": 1.4846465816009955, "learning_rate": 2.9775540587216912e-06, "loss": 0.3263, "step": 6613 }, { "epoch": 0.643698296836983, "grad_norm": 1.2943716131485596, "learning_rate": 2.9761126978229895e-06, "loss": 0.3322, "step": 6614 }, { "epoch": 0.6437956204379562, "grad_norm": 1.3583973844127837, "learning_rate": 2.9746715380392112e-06, "loss": 0.4017, "step": 6615 }, { "epoch": 0.6438929440389295, "grad_norm": 1.1103818381014245, "learning_rate": 2.9732305795135665e-06, "loss": 0.3148, "step": 6616 }, { "epoch": 0.6439902676399026, "grad_norm": 1.7583755608621012, "learning_rate": 2.971789822389245e-06, "loss": 0.5661, "step": 6617 }, { "epoch": 0.6440875912408759, "grad_norm": 1.4829147384015064, "learning_rate": 2.970349266809417e-06, "loss": 0.4069, "step": 6618 }, { "epoch": 0.6441849148418491, "grad_norm": 1.4588836267455016, "learning_rate": 2.9689089129172285e-06, "loss": 0.3918, "step": 6619 }, { "epoch": 0.6442822384428224, "grad_norm": 1.1937496964219592, "learning_rate": 2.9674687608558096e-06, "loss": 0.2991, "step": 6620 }, { "epoch": 0.6443795620437957, "grad_norm": 1.8283816426425703, "learning_rate": 2.966028810768271e-06, "loss": 0.3497, "step": 6621 }, { "epoch": 0.6444768856447689, "grad_norm": 1.4663545269143798, "learning_rate": 2.9645890627976987e-06, "loss": 0.5835, "step": 6622 }, { "epoch": 0.644574209245742, "grad_norm": 1.4473218816822218, "learning_rate": 2.9631495170871605e-06, "loss": 0.4357, "step": 6623 }, { "epoch": 0.6446715328467153, "grad_norm": 1.4348975964470179, "learning_rate": 2.961710173779708e-06, "loss": 0.4294, "step": 6624 }, { "epoch": 0.6447688564476886, "grad_norm": 1.3680725284863948, "learning_rate": 2.9602710330183706e-06, "loss": 0.3398, "step": 6625 }, { "epoch": 0.6448661800486618, "grad_norm": 1.5161776763951267, "learning_rate": 2.958832094946151e-06, "loss": 0.3968, "step": 6626 }, { "epoch": 0.6449635036496351, "grad_norm": 1.5178720393371483, "learning_rate": 2.957393359706042e-06, "loss": 0.3841, "step": 6627 }, { "epoch": 0.6450608272506083, "grad_norm": 1.4053519030224193, "learning_rate": 2.955954827441011e-06, "loss": 0.4372, "step": 6628 }, { "epoch": 0.6451581508515815, "grad_norm": 1.4955618687276493, "learning_rate": 2.9545164982940045e-06, "loss": 0.3427, "step": 6629 }, { "epoch": 0.6452554744525547, "grad_norm": 1.1191184156520917, "learning_rate": 2.95307837240795e-06, "loss": 0.2363, "step": 6630 }, { "epoch": 0.645352798053528, "grad_norm": 1.4559198389990236, "learning_rate": 2.9516404499257565e-06, "loss": 0.4113, "step": 6631 }, { "epoch": 0.6454501216545012, "grad_norm": 1.4156938279284876, "learning_rate": 2.9502027309903125e-06, "loss": 0.2687, "step": 6632 }, { "epoch": 0.6455474452554745, "grad_norm": 1.546264065406372, "learning_rate": 2.9487652157444803e-06, "loss": 0.5267, "step": 6633 }, { "epoch": 0.6456447688564477, "grad_norm": 1.258388837428096, "learning_rate": 2.94732790433111e-06, "loss": 0.3008, "step": 6634 }, { "epoch": 0.645742092457421, "grad_norm": 1.3070462770390554, "learning_rate": 2.9458907968930274e-06, "loss": 0.314, "step": 6635 }, { "epoch": 0.6458394160583941, "grad_norm": 1.342732865154092, "learning_rate": 2.944453893573041e-06, "loss": 0.2853, "step": 6636 }, { "epoch": 0.6459367396593674, "grad_norm": 1.2365501650372648, "learning_rate": 2.9430171945139325e-06, "loss": 0.3006, "step": 6637 }, { "epoch": 0.6460340632603406, "grad_norm": 1.4756838490224202, "learning_rate": 2.9415806998584695e-06, "loss": 0.2777, "step": 6638 }, { "epoch": 0.6461313868613139, "grad_norm": 1.8617608286928278, "learning_rate": 2.9401444097493993e-06, "loss": 0.3576, "step": 6639 }, { "epoch": 0.6462287104622871, "grad_norm": 1.7158037075222772, "learning_rate": 2.9387083243294433e-06, "loss": 0.497, "step": 6640 }, { "epoch": 0.6463260340632604, "grad_norm": 1.3471686324159067, "learning_rate": 2.937272443741309e-06, "loss": 0.273, "step": 6641 }, { "epoch": 0.6464233576642335, "grad_norm": 1.1938097614288674, "learning_rate": 2.935836768127679e-06, "loss": 0.2135, "step": 6642 }, { "epoch": 0.6465206812652068, "grad_norm": 1.2363318854541447, "learning_rate": 2.9344012976312197e-06, "loss": 0.2831, "step": 6643 }, { "epoch": 0.64661800486618, "grad_norm": 1.3109023952216936, "learning_rate": 2.932966032394572e-06, "loss": 0.333, "step": 6644 }, { "epoch": 0.6467153284671533, "grad_norm": 1.530272814294704, "learning_rate": 2.9315309725603596e-06, "loss": 0.483, "step": 6645 }, { "epoch": 0.6468126520681265, "grad_norm": 1.5315420689647377, "learning_rate": 2.9300961182711884e-06, "loss": 0.4346, "step": 6646 }, { "epoch": 0.6469099756690998, "grad_norm": 1.3411164758919616, "learning_rate": 2.9286614696696358e-06, "loss": 0.3016, "step": 6647 }, { "epoch": 0.6470072992700729, "grad_norm": 1.7287882961574295, "learning_rate": 2.9272270268982663e-06, "loss": 0.448, "step": 6648 }, { "epoch": 0.6471046228710462, "grad_norm": 1.5086159418529348, "learning_rate": 2.9257927900996216e-06, "loss": 0.4965, "step": 6649 }, { "epoch": 0.6472019464720195, "grad_norm": 1.121913171825412, "learning_rate": 2.9243587594162226e-06, "loss": 0.2167, "step": 6650 }, { "epoch": 0.6472992700729927, "grad_norm": 1.8580765237396737, "learning_rate": 2.9229249349905686e-06, "loss": 0.3295, "step": 6651 }, { "epoch": 0.647396593673966, "grad_norm": 1.5977490684949451, "learning_rate": 2.9214913169651404e-06, "loss": 0.5074, "step": 6652 }, { "epoch": 0.6474939172749392, "grad_norm": 1.3471648716381126, "learning_rate": 2.920057905482398e-06, "loss": 0.3431, "step": 6653 }, { "epoch": 0.6475912408759124, "grad_norm": 1.6787335903021845, "learning_rate": 2.9186247006847805e-06, "loss": 0.336, "step": 6654 }, { "epoch": 0.6476885644768856, "grad_norm": 1.5099697197158022, "learning_rate": 2.917191702714705e-06, "loss": 0.3717, "step": 6655 }, { "epoch": 0.6477858880778589, "grad_norm": 1.493061548659645, "learning_rate": 2.9157589117145704e-06, "loss": 0.4942, "step": 6656 }, { "epoch": 0.6478832116788321, "grad_norm": 1.414229999548491, "learning_rate": 2.9143263278267555e-06, "loss": 0.3514, "step": 6657 }, { "epoch": 0.6479805352798054, "grad_norm": 1.6119514508142312, "learning_rate": 2.912893951193614e-06, "loss": 0.3723, "step": 6658 }, { "epoch": 0.6480778588807786, "grad_norm": 1.71435984950754, "learning_rate": 2.9114617819574824e-06, "loss": 0.5102, "step": 6659 }, { "epoch": 0.6481751824817519, "grad_norm": 1.5558302623525726, "learning_rate": 2.910029820260678e-06, "loss": 0.2798, "step": 6660 }, { "epoch": 0.648272506082725, "grad_norm": 1.3469746068247404, "learning_rate": 2.9085980662454964e-06, "loss": 0.2322, "step": 6661 }, { "epoch": 0.6483698296836983, "grad_norm": 1.401344389514034, "learning_rate": 2.907166520054207e-06, "loss": 0.3496, "step": 6662 }, { "epoch": 0.6484671532846715, "grad_norm": 1.4870078579015846, "learning_rate": 2.9057351818290687e-06, "loss": 0.4631, "step": 6663 }, { "epoch": 0.6485644768856448, "grad_norm": 1.957932437958498, "learning_rate": 2.9043040517123143e-06, "loss": 0.5221, "step": 6664 }, { "epoch": 0.648661800486618, "grad_norm": 1.6613311559459771, "learning_rate": 2.9028731298461533e-06, "loss": 0.3169, "step": 6665 }, { "epoch": 0.6487591240875913, "grad_norm": 1.5105275431309184, "learning_rate": 2.901442416372777e-06, "loss": 0.4091, "step": 6666 }, { "epoch": 0.6488564476885644, "grad_norm": 1.634055070945907, "learning_rate": 2.9000119114343584e-06, "loss": 0.4286, "step": 6667 }, { "epoch": 0.6489537712895377, "grad_norm": 1.3345448334510328, "learning_rate": 2.8985816151730497e-06, "loss": 0.4469, "step": 6668 }, { "epoch": 0.6490510948905109, "grad_norm": 2.727388185889262, "learning_rate": 2.897151527730974e-06, "loss": 0.4915, "step": 6669 }, { "epoch": 0.6491484184914842, "grad_norm": 1.3341487933996297, "learning_rate": 2.895721649250244e-06, "loss": 0.21, "step": 6670 }, { "epoch": 0.6492457420924574, "grad_norm": 1.500453403612506, "learning_rate": 2.8942919798729473e-06, "loss": 0.3799, "step": 6671 }, { "epoch": 0.6493430656934307, "grad_norm": 1.4647330687352165, "learning_rate": 2.892862519741153e-06, "loss": 0.4607, "step": 6672 }, { "epoch": 0.6494403892944038, "grad_norm": 1.4687518667655608, "learning_rate": 2.8914332689969014e-06, "loss": 0.3455, "step": 6673 }, { "epoch": 0.6495377128953771, "grad_norm": 1.3535602776894515, "learning_rate": 2.890004227782224e-06, "loss": 0.3697, "step": 6674 }, { "epoch": 0.6496350364963503, "grad_norm": 1.2346898896530456, "learning_rate": 2.888575396239125e-06, "loss": 0.291, "step": 6675 }, { "epoch": 0.6497323600973236, "grad_norm": 1.2776327477031653, "learning_rate": 2.8871467745095842e-06, "loss": 0.3244, "step": 6676 }, { "epoch": 0.6498296836982969, "grad_norm": 1.402523358672398, "learning_rate": 2.8857183627355677e-06, "loss": 0.358, "step": 6677 }, { "epoch": 0.6499270072992701, "grad_norm": 1.2448787685336862, "learning_rate": 2.884290161059017e-06, "loss": 0.224, "step": 6678 }, { "epoch": 0.6500243309002434, "grad_norm": 1.4739988161959612, "learning_rate": 2.882862169621855e-06, "loss": 0.41, "step": 6679 }, { "epoch": 0.6501216545012165, "grad_norm": 1.2966540964264213, "learning_rate": 2.881434388565979e-06, "loss": 0.3083, "step": 6680 }, { "epoch": 0.6502189781021898, "grad_norm": 1.460840721800784, "learning_rate": 2.8800068180332697e-06, "loss": 0.4741, "step": 6681 }, { "epoch": 0.650316301703163, "grad_norm": 2.158897874954919, "learning_rate": 2.878579458165588e-06, "loss": 0.4813, "step": 6682 }, { "epoch": 0.6504136253041363, "grad_norm": 1.3984750071349343, "learning_rate": 2.877152309104766e-06, "loss": 0.3782, "step": 6683 }, { "epoch": 0.6505109489051095, "grad_norm": 1.326237762567292, "learning_rate": 2.8757253709926245e-06, "loss": 0.391, "step": 6684 }, { "epoch": 0.6506082725060828, "grad_norm": 1.5557193877972066, "learning_rate": 2.87429864397096e-06, "loss": 0.5827, "step": 6685 }, { "epoch": 0.6507055961070559, "grad_norm": 1.6241143087080552, "learning_rate": 2.8728721281815473e-06, "loss": 0.6017, "step": 6686 }, { "epoch": 0.6508029197080292, "grad_norm": 1.4980491031396401, "learning_rate": 2.8714458237661363e-06, "loss": 0.3771, "step": 6687 }, { "epoch": 0.6509002433090024, "grad_norm": 2.3092643164099758, "learning_rate": 2.8700197308664624e-06, "loss": 0.5363, "step": 6688 }, { "epoch": 0.6509975669099757, "grad_norm": 1.4739719655335488, "learning_rate": 2.868593849624237e-06, "loss": 0.4634, "step": 6689 }, { "epoch": 0.6510948905109489, "grad_norm": 1.5066114000013453, "learning_rate": 2.867168180181153e-06, "loss": 0.3942, "step": 6690 }, { "epoch": 0.6511922141119222, "grad_norm": 1.6017351010985514, "learning_rate": 2.865742722678876e-06, "loss": 0.388, "step": 6691 }, { "epoch": 0.6512895377128953, "grad_norm": 1.400788786613074, "learning_rate": 2.864317477259056e-06, "loss": 0.4073, "step": 6692 }, { "epoch": 0.6513868613138686, "grad_norm": 1.7401054497258985, "learning_rate": 2.862892444063321e-06, "loss": 0.2496, "step": 6693 }, { "epoch": 0.6514841849148418, "grad_norm": 1.5923507382135946, "learning_rate": 2.8614676232332776e-06, "loss": 0.3246, "step": 6694 }, { "epoch": 0.6515815085158151, "grad_norm": 1.4027517251259218, "learning_rate": 2.8600430149105106e-06, "loss": 0.382, "step": 6695 }, { "epoch": 0.6516788321167883, "grad_norm": 1.6163192113468026, "learning_rate": 2.858618619236585e-06, "loss": 0.3401, "step": 6696 }, { "epoch": 0.6517761557177616, "grad_norm": 1.4795033067342065, "learning_rate": 2.8571944363530455e-06, "loss": 0.5037, "step": 6697 }, { "epoch": 0.6518734793187347, "grad_norm": 2.0629604865916074, "learning_rate": 2.85577046640141e-06, "loss": 0.371, "step": 6698 }, { "epoch": 0.651970802919708, "grad_norm": 1.3478044069092732, "learning_rate": 2.8543467095231803e-06, "loss": 0.3012, "step": 6699 }, { "epoch": 0.6520681265206812, "grad_norm": 1.4802320789398475, "learning_rate": 2.852923165859838e-06, "loss": 0.3371, "step": 6700 }, { "epoch": 0.6521654501216545, "grad_norm": 1.6363690350905862, "learning_rate": 2.8514998355528415e-06, "loss": 0.3674, "step": 6701 }, { "epoch": 0.6522627737226278, "grad_norm": 1.260026771624302, "learning_rate": 2.850076718743625e-06, "loss": 0.3232, "step": 6702 }, { "epoch": 0.652360097323601, "grad_norm": 1.412248322337149, "learning_rate": 2.848653815573607e-06, "loss": 0.4153, "step": 6703 }, { "epoch": 0.6524574209245743, "grad_norm": 1.5690732402544092, "learning_rate": 2.847231126184181e-06, "loss": 0.5527, "step": 6704 }, { "epoch": 0.6525547445255474, "grad_norm": 1.527139167059845, "learning_rate": 2.845808650716722e-06, "loss": 0.3157, "step": 6705 }, { "epoch": 0.6526520681265207, "grad_norm": 1.6747933701558217, "learning_rate": 2.8443863893125813e-06, "loss": 0.2914, "step": 6706 }, { "epoch": 0.6527493917274939, "grad_norm": 1.5567914825053548, "learning_rate": 2.8429643421130892e-06, "loss": 0.3197, "step": 6707 }, { "epoch": 0.6528467153284672, "grad_norm": 1.5472847752203414, "learning_rate": 2.8415425092595594e-06, "loss": 0.4646, "step": 6708 }, { "epoch": 0.6529440389294404, "grad_norm": 1.6913478920540168, "learning_rate": 2.840120890893274e-06, "loss": 0.3787, "step": 6709 }, { "epoch": 0.6530413625304137, "grad_norm": 1.5849915487984703, "learning_rate": 2.838699487155504e-06, "loss": 0.4798, "step": 6710 }, { "epoch": 0.6531386861313868, "grad_norm": 1.7023533464137646, "learning_rate": 2.8372782981874964e-06, "loss": 0.3343, "step": 6711 }, { "epoch": 0.6532360097323601, "grad_norm": 1.541253945375762, "learning_rate": 2.835857324130471e-06, "loss": 0.4786, "step": 6712 }, { "epoch": 0.6533333333333333, "grad_norm": 1.2110189102863618, "learning_rate": 2.8344365651256344e-06, "loss": 0.2647, "step": 6713 }, { "epoch": 0.6534306569343066, "grad_norm": 1.4043068918605717, "learning_rate": 2.8330160213141664e-06, "loss": 0.3855, "step": 6714 }, { "epoch": 0.6535279805352798, "grad_norm": 1.4107831158669495, "learning_rate": 2.831595692837229e-06, "loss": 0.4442, "step": 6715 }, { "epoch": 0.6536253041362531, "grad_norm": 1.4723933868153984, "learning_rate": 2.83017557983596e-06, "loss": 0.3573, "step": 6716 }, { "epoch": 0.6537226277372262, "grad_norm": 1.7510733037840591, "learning_rate": 2.8287556824514778e-06, "loss": 0.3143, "step": 6717 }, { "epoch": 0.6538199513381995, "grad_norm": 1.2229236152013945, "learning_rate": 2.8273360008248773e-06, "loss": 0.3149, "step": 6718 }, { "epoch": 0.6539172749391727, "grad_norm": 1.213354737561109, "learning_rate": 2.8259165350972367e-06, "loss": 0.2917, "step": 6719 }, { "epoch": 0.654014598540146, "grad_norm": 1.5777200613673565, "learning_rate": 2.8244972854096036e-06, "loss": 0.5767, "step": 6720 }, { "epoch": 0.6541119221411192, "grad_norm": 1.286511480595245, "learning_rate": 2.823078251903013e-06, "loss": 0.2742, "step": 6721 }, { "epoch": 0.6542092457420925, "grad_norm": 1.598419827748563, "learning_rate": 2.8216594347184754e-06, "loss": 0.446, "step": 6722 }, { "epoch": 0.6543065693430657, "grad_norm": 1.1662896886943277, "learning_rate": 2.8202408339969776e-06, "loss": 0.3565, "step": 6723 }, { "epoch": 0.6544038929440389, "grad_norm": 3.085003288922651, "learning_rate": 2.818822449879488e-06, "loss": 0.5741, "step": 6724 }, { "epoch": 0.6545012165450121, "grad_norm": 1.2821354016498248, "learning_rate": 2.8174042825069526e-06, "loss": 0.3194, "step": 6725 }, { "epoch": 0.6545985401459854, "grad_norm": 1.4086569638066282, "learning_rate": 2.815986332020294e-06, "loss": 0.3585, "step": 6726 }, { "epoch": 0.6546958637469587, "grad_norm": 1.2516048619486468, "learning_rate": 2.8145685985604164e-06, "loss": 0.3207, "step": 6727 }, { "epoch": 0.6547931873479319, "grad_norm": 1.6096589661966523, "learning_rate": 2.8131510822682005e-06, "loss": 0.3966, "step": 6728 }, { "epoch": 0.6548905109489052, "grad_norm": 1.4450744432855225, "learning_rate": 2.811733783284508e-06, "loss": 0.3273, "step": 6729 }, { "epoch": 0.6549878345498783, "grad_norm": 1.0838015290432028, "learning_rate": 2.8103167017501725e-06, "loss": 0.2482, "step": 6730 }, { "epoch": 0.6550851581508516, "grad_norm": 1.4705790230518594, "learning_rate": 2.8088998378060116e-06, "loss": 0.3232, "step": 6731 }, { "epoch": 0.6551824817518248, "grad_norm": 1.8547998125992238, "learning_rate": 2.8074831915928213e-06, "loss": 0.4581, "step": 6732 }, { "epoch": 0.6552798053527981, "grad_norm": 1.512042028276952, "learning_rate": 2.806066763251376e-06, "loss": 0.2257, "step": 6733 }, { "epoch": 0.6553771289537713, "grad_norm": 1.5493321025737354, "learning_rate": 2.804650552922422e-06, "loss": 0.4022, "step": 6734 }, { "epoch": 0.6554744525547446, "grad_norm": 1.3866403906805511, "learning_rate": 2.8032345607466927e-06, "loss": 0.3454, "step": 6735 }, { "epoch": 0.6555717761557177, "grad_norm": 1.5579323189741427, "learning_rate": 2.801818786864895e-06, "loss": 0.3515, "step": 6736 }, { "epoch": 0.655669099756691, "grad_norm": 1.497690568748641, "learning_rate": 2.8004032314177154e-06, "loss": 0.2639, "step": 6737 }, { "epoch": 0.6557664233576642, "grad_norm": 1.5483077378855565, "learning_rate": 2.7989878945458193e-06, "loss": 0.6582, "step": 6738 }, { "epoch": 0.6558637469586375, "grad_norm": 1.4877237392102052, "learning_rate": 2.7975727763898486e-06, "loss": 0.1536, "step": 6739 }, { "epoch": 0.6559610705596107, "grad_norm": 1.336065542766245, "learning_rate": 2.7961578770904263e-06, "loss": 0.3445, "step": 6740 }, { "epoch": 0.656058394160584, "grad_norm": 1.4297878876342072, "learning_rate": 2.794743196788149e-06, "loss": 0.377, "step": 6741 }, { "epoch": 0.6561557177615571, "grad_norm": 1.7819741099025646, "learning_rate": 2.7933287356235956e-06, "loss": 0.5225, "step": 6742 }, { "epoch": 0.6562530413625304, "grad_norm": 1.3255852144665683, "learning_rate": 2.791914493737322e-06, "loss": 0.3054, "step": 6743 }, { "epoch": 0.6563503649635036, "grad_norm": 1.969683108136502, "learning_rate": 2.7905004712698646e-06, "loss": 0.3802, "step": 6744 }, { "epoch": 0.6564476885644769, "grad_norm": 1.4518428601547329, "learning_rate": 2.7890866683617314e-06, "loss": 0.443, "step": 6745 }, { "epoch": 0.6565450121654501, "grad_norm": 1.7057052804764412, "learning_rate": 2.787673085153414e-06, "loss": 0.5134, "step": 6746 }, { "epoch": 0.6566423357664234, "grad_norm": 1.3307615252577463, "learning_rate": 2.7862597217853827e-06, "loss": 0.284, "step": 6747 }, { "epoch": 0.6567396593673966, "grad_norm": 1.5300810887159755, "learning_rate": 2.7848465783980837e-06, "loss": 0.2296, "step": 6748 }, { "epoch": 0.6568369829683698, "grad_norm": 1.585924270649872, "learning_rate": 2.783433655131941e-06, "loss": 0.4173, "step": 6749 }, { "epoch": 0.656934306569343, "grad_norm": 1.6458852091794416, "learning_rate": 2.782020952127359e-06, "loss": 0.6476, "step": 6750 }, { "epoch": 0.6570316301703163, "grad_norm": 1.2530117944587003, "learning_rate": 2.78060846952472e-06, "loss": 0.3907, "step": 6751 }, { "epoch": 0.6571289537712895, "grad_norm": 1.451593473861585, "learning_rate": 2.77919620746438e-06, "loss": 0.4913, "step": 6752 }, { "epoch": 0.6572262773722628, "grad_norm": 1.6784954010346753, "learning_rate": 2.7777841660866776e-06, "loss": 0.4332, "step": 6753 }, { "epoch": 0.657323600973236, "grad_norm": 1.7320311921859133, "learning_rate": 2.7763723455319284e-06, "loss": 0.4713, "step": 6754 }, { "epoch": 0.6574209245742092, "grad_norm": 1.784547063277984, "learning_rate": 2.774960745940428e-06, "loss": 0.3729, "step": 6755 }, { "epoch": 0.6575182481751825, "grad_norm": 1.289910055174179, "learning_rate": 2.7735493674524437e-06, "loss": 0.2325, "step": 6756 }, { "epoch": 0.6576155717761557, "grad_norm": 1.3923811378634814, "learning_rate": 2.772138210208228e-06, "loss": 0.4136, "step": 6757 }, { "epoch": 0.657712895377129, "grad_norm": 1.5675438914017519, "learning_rate": 2.7707272743480073e-06, "loss": 0.4787, "step": 6758 }, { "epoch": 0.6578102189781022, "grad_norm": 2.4454474366224277, "learning_rate": 2.7693165600119875e-06, "loss": 0.3791, "step": 6759 }, { "epoch": 0.6579075425790755, "grad_norm": 1.4853382398420436, "learning_rate": 2.7679060673403517e-06, "loss": 0.4117, "step": 6760 }, { "epoch": 0.6580048661800486, "grad_norm": 1.6557659665466375, "learning_rate": 2.7664957964732624e-06, "loss": 0.5487, "step": 6761 }, { "epoch": 0.6581021897810219, "grad_norm": 1.512419230298557, "learning_rate": 2.7650857475508608e-06, "loss": 0.4162, "step": 6762 }, { "epoch": 0.6581995133819951, "grad_norm": 1.408478729748277, "learning_rate": 2.76367592071326e-06, "loss": 0.2971, "step": 6763 }, { "epoch": 0.6582968369829684, "grad_norm": 1.3898916132267092, "learning_rate": 2.7622663161005576e-06, "loss": 0.2689, "step": 6764 }, { "epoch": 0.6583941605839416, "grad_norm": 1.8935341639633638, "learning_rate": 2.7608569338528284e-06, "loss": 0.6053, "step": 6765 }, { "epoch": 0.6584914841849149, "grad_norm": 1.4152439303474345, "learning_rate": 2.75944777411012e-06, "loss": 0.2361, "step": 6766 }, { "epoch": 0.6585888077858881, "grad_norm": 1.3655151602351514, "learning_rate": 2.7580388370124644e-06, "loss": 0.3103, "step": 6767 }, { "epoch": 0.6586861313868613, "grad_norm": 1.4381378251506454, "learning_rate": 2.7566301226998667e-06, "loss": 0.4752, "step": 6768 }, { "epoch": 0.6587834549878345, "grad_norm": 1.4347834419039491, "learning_rate": 2.7552216313123126e-06, "loss": 0.3787, "step": 6769 }, { "epoch": 0.6588807785888078, "grad_norm": 1.485348512745472, "learning_rate": 2.753813362989765e-06, "loss": 0.531, "step": 6770 }, { "epoch": 0.658978102189781, "grad_norm": 1.5954596381561879, "learning_rate": 2.7524053178721642e-06, "loss": 0.3961, "step": 6771 }, { "epoch": 0.6590754257907543, "grad_norm": 1.6653211248442563, "learning_rate": 2.750997496099428e-06, "loss": 0.4432, "step": 6772 }, { "epoch": 0.6591727493917275, "grad_norm": 1.9895189551145296, "learning_rate": 2.7495898978114554e-06, "loss": 0.3427, "step": 6773 }, { "epoch": 0.6592700729927007, "grad_norm": 1.395584392648721, "learning_rate": 2.7481825231481156e-06, "loss": 0.2998, "step": 6774 }, { "epoch": 0.6593673965936739, "grad_norm": 1.4257930389334967, "learning_rate": 2.746775372249263e-06, "loss": 0.2927, "step": 6775 }, { "epoch": 0.6594647201946472, "grad_norm": 1.2339528484610085, "learning_rate": 2.745368445254728e-06, "loss": 0.2964, "step": 6776 }, { "epoch": 0.6595620437956204, "grad_norm": 1.792398199917605, "learning_rate": 2.7439617423043146e-06, "loss": 0.5683, "step": 6777 }, { "epoch": 0.6596593673965937, "grad_norm": 1.2875917309424718, "learning_rate": 2.7425552635378094e-06, "loss": 0.3235, "step": 6778 }, { "epoch": 0.659756690997567, "grad_norm": 1.3979402222711956, "learning_rate": 2.7411490090949754e-06, "loss": 0.3291, "step": 6779 }, { "epoch": 0.6598540145985401, "grad_norm": 1.203183259420406, "learning_rate": 2.7397429791155526e-06, "loss": 0.2845, "step": 6780 }, { "epoch": 0.6599513381995133, "grad_norm": 1.535820193773522, "learning_rate": 2.73833717373926e-06, "loss": 0.6247, "step": 6781 }, { "epoch": 0.6600486618004866, "grad_norm": 1.3880646527538296, "learning_rate": 2.7369315931057916e-06, "loss": 0.3147, "step": 6782 }, { "epoch": 0.6601459854014599, "grad_norm": 1.317367338707539, "learning_rate": 2.7355262373548243e-06, "loss": 0.3695, "step": 6783 }, { "epoch": 0.6602433090024331, "grad_norm": 1.8179251934058178, "learning_rate": 2.7341211066260047e-06, "loss": 0.3467, "step": 6784 }, { "epoch": 0.6603406326034064, "grad_norm": 1.4156028830040523, "learning_rate": 2.7327162010589636e-06, "loss": 0.395, "step": 6785 }, { "epoch": 0.6604379562043796, "grad_norm": 1.5206793441500446, "learning_rate": 2.7313115207933068e-06, "loss": 0.4647, "step": 6786 }, { "epoch": 0.6605352798053528, "grad_norm": 1.4097349430422725, "learning_rate": 2.7299070659686207e-06, "loss": 0.3793, "step": 6787 }, { "epoch": 0.660632603406326, "grad_norm": 1.3418131331745644, "learning_rate": 2.7285028367244625e-06, "loss": 0.2305, "step": 6788 }, { "epoch": 0.6607299270072993, "grad_norm": 1.3267575573257613, "learning_rate": 2.727098833200374e-06, "loss": 0.3616, "step": 6789 }, { "epoch": 0.6608272506082725, "grad_norm": 1.1981549343932583, "learning_rate": 2.725695055535871e-06, "loss": 0.2421, "step": 6790 }, { "epoch": 0.6609245742092458, "grad_norm": 1.4727479617756, "learning_rate": 2.724291503870449e-06, "loss": 0.5035, "step": 6791 }, { "epoch": 0.661021897810219, "grad_norm": 1.4243252779763125, "learning_rate": 2.7228881783435785e-06, "loss": 0.5362, "step": 6792 }, { "epoch": 0.6611192214111922, "grad_norm": 1.2399776162926734, "learning_rate": 2.7214850790947088e-06, "loss": 0.2938, "step": 6793 }, { "epoch": 0.6612165450121654, "grad_norm": 1.6318296549942648, "learning_rate": 2.72008220626327e-06, "loss": 0.1549, "step": 6794 }, { "epoch": 0.6613138686131387, "grad_norm": 1.3501742956886726, "learning_rate": 2.718679559988662e-06, "loss": 0.3351, "step": 6795 }, { "epoch": 0.6614111922141119, "grad_norm": 1.330888901571354, "learning_rate": 2.7172771404102683e-06, "loss": 0.3373, "step": 6796 }, { "epoch": 0.6615085158150852, "grad_norm": 3.9666140357834805, "learning_rate": 2.715874947667447e-06, "loss": 0.316, "step": 6797 }, { "epoch": 0.6616058394160584, "grad_norm": 1.5280511881158292, "learning_rate": 2.71447298189954e-06, "loss": 0.2387, "step": 6798 }, { "epoch": 0.6617031630170316, "grad_norm": 1.242125983484383, "learning_rate": 2.7130712432458537e-06, "loss": 0.1557, "step": 6799 }, { "epoch": 0.6618004866180048, "grad_norm": 1.8552117420688552, "learning_rate": 2.7116697318456847e-06, "loss": 0.2278, "step": 6800 }, { "epoch": 0.6618978102189781, "grad_norm": 1.4872710236053985, "learning_rate": 2.7102684478383006e-06, "loss": 0.373, "step": 6801 }, { "epoch": 0.6619951338199513, "grad_norm": 1.6932412428607622, "learning_rate": 2.708867391362948e-06, "loss": 0.4548, "step": 6802 }, { "epoch": 0.6620924574209246, "grad_norm": 1.4356740051860206, "learning_rate": 2.7074665625588515e-06, "loss": 0.2871, "step": 6803 }, { "epoch": 0.6621897810218978, "grad_norm": 1.652451531973401, "learning_rate": 2.706065961565212e-06, "loss": 0.3971, "step": 6804 }, { "epoch": 0.662287104622871, "grad_norm": 1.486040887134083, "learning_rate": 2.7046655885212093e-06, "loss": 0.2709, "step": 6805 }, { "epoch": 0.6623844282238442, "grad_norm": 1.4917475666009035, "learning_rate": 2.703265443565996e-06, "loss": 0.4575, "step": 6806 }, { "epoch": 0.6624817518248175, "grad_norm": 1.4235482065006015, "learning_rate": 2.7018655268387075e-06, "loss": 0.3679, "step": 6807 }, { "epoch": 0.6625790754257908, "grad_norm": 1.2560006591679733, "learning_rate": 2.700465838478454e-06, "loss": 0.2515, "step": 6808 }, { "epoch": 0.662676399026764, "grad_norm": 1.497765784519021, "learning_rate": 2.6990663786243255e-06, "loss": 0.6173, "step": 6809 }, { "epoch": 0.6627737226277373, "grad_norm": 1.3066855110703985, "learning_rate": 2.697667147415383e-06, "loss": 0.2614, "step": 6810 }, { "epoch": 0.6628710462287105, "grad_norm": 1.5807901671285265, "learning_rate": 2.696268144990669e-06, "loss": 0.2332, "step": 6811 }, { "epoch": 0.6629683698296837, "grad_norm": 1.4737498626838341, "learning_rate": 2.6948693714892104e-06, "loss": 0.345, "step": 6812 }, { "epoch": 0.6630656934306569, "grad_norm": 1.656285051658688, "learning_rate": 2.6934708270499964e-06, "loss": 0.3504, "step": 6813 }, { "epoch": 0.6631630170316302, "grad_norm": 1.76532033806788, "learning_rate": 2.692072511812004e-06, "loss": 0.5332, "step": 6814 }, { "epoch": 0.6632603406326034, "grad_norm": 1.1082327966815568, "learning_rate": 2.6906744259141847e-06, "loss": 0.2255, "step": 6815 }, { "epoch": 0.6633576642335767, "grad_norm": 1.5337460473840547, "learning_rate": 2.6892765694954696e-06, "loss": 0.4916, "step": 6816 }, { "epoch": 0.6634549878345499, "grad_norm": 1.352395434964838, "learning_rate": 2.68787894269476e-06, "loss": 0.2645, "step": 6817 }, { "epoch": 0.6635523114355231, "grad_norm": 1.4003880712254435, "learning_rate": 2.686481545650941e-06, "loss": 0.3401, "step": 6818 }, { "epoch": 0.6636496350364963, "grad_norm": 1.6332773523403699, "learning_rate": 2.6850843785028748e-06, "loss": 0.4248, "step": 6819 }, { "epoch": 0.6637469586374696, "grad_norm": 1.6862710787146364, "learning_rate": 2.6836874413893945e-06, "loss": 0.5409, "step": 6820 }, { "epoch": 0.6638442822384428, "grad_norm": 1.2001202205200827, "learning_rate": 2.6822907344493143e-06, "loss": 0.1653, "step": 6821 }, { "epoch": 0.6639416058394161, "grad_norm": 1.723918979652751, "learning_rate": 2.6808942578214312e-06, "loss": 0.4331, "step": 6822 }, { "epoch": 0.6640389294403893, "grad_norm": 1.5146121544000053, "learning_rate": 2.6794980116445133e-06, "loss": 0.4546, "step": 6823 }, { "epoch": 0.6641362530413625, "grad_norm": 1.302752950784683, "learning_rate": 2.6781019960573016e-06, "loss": 0.2941, "step": 6824 }, { "epoch": 0.6642335766423357, "grad_norm": 1.724459372679229, "learning_rate": 2.676706211198522e-06, "loss": 0.3436, "step": 6825 }, { "epoch": 0.664330900243309, "grad_norm": 1.3415579548422654, "learning_rate": 2.6753106572068743e-06, "loss": 0.4702, "step": 6826 }, { "epoch": 0.6644282238442822, "grad_norm": 1.3351960829701917, "learning_rate": 2.6739153342210378e-06, "loss": 0.3489, "step": 6827 }, { "epoch": 0.6645255474452555, "grad_norm": 1.3789849191329269, "learning_rate": 2.6725202423796615e-06, "loss": 0.3317, "step": 6828 }, { "epoch": 0.6646228710462287, "grad_norm": 1.7664426412634113, "learning_rate": 2.67112538182138e-06, "loss": 0.543, "step": 6829 }, { "epoch": 0.664720194647202, "grad_norm": 1.4864940575751309, "learning_rate": 2.6697307526848026e-06, "loss": 0.4393, "step": 6830 }, { "epoch": 0.6648175182481751, "grad_norm": 1.5739230879298867, "learning_rate": 2.6683363551085085e-06, "loss": 0.3464, "step": 6831 }, { "epoch": 0.6649148418491484, "grad_norm": 1.2218897552770067, "learning_rate": 2.6669421892310654e-06, "loss": 0.2979, "step": 6832 }, { "epoch": 0.6650121654501217, "grad_norm": 1.4774874257748367, "learning_rate": 2.665548255191012e-06, "loss": 0.3952, "step": 6833 }, { "epoch": 0.6651094890510949, "grad_norm": 1.4018603158386684, "learning_rate": 2.6641545531268644e-06, "loss": 0.535, "step": 6834 }, { "epoch": 0.6652068126520682, "grad_norm": 1.1243934055622178, "learning_rate": 2.6627610831771134e-06, "loss": 0.2501, "step": 6835 }, { "epoch": 0.6653041362530414, "grad_norm": 1.512693499963609, "learning_rate": 2.6613678454802293e-06, "loss": 0.206, "step": 6836 }, { "epoch": 0.6654014598540146, "grad_norm": 1.435277796848393, "learning_rate": 2.6599748401746605e-06, "loss": 0.2615, "step": 6837 }, { "epoch": 0.6654987834549878, "grad_norm": 1.6561965213994767, "learning_rate": 2.6585820673988315e-06, "loss": 0.4856, "step": 6838 }, { "epoch": 0.6655961070559611, "grad_norm": 1.4960382117577842, "learning_rate": 2.65718952729114e-06, "loss": 0.3522, "step": 6839 }, { "epoch": 0.6656934306569343, "grad_norm": 1.4027075335516728, "learning_rate": 2.655797219989965e-06, "loss": 0.3226, "step": 6840 }, { "epoch": 0.6657907542579076, "grad_norm": 1.2693321189230098, "learning_rate": 2.654405145633664e-06, "loss": 0.497, "step": 6841 }, { "epoch": 0.6658880778588808, "grad_norm": 1.6336499854414535, "learning_rate": 2.6530133043605606e-06, "loss": 0.4969, "step": 6842 }, { "epoch": 0.665985401459854, "grad_norm": 1.7307854700549319, "learning_rate": 2.6516216963089698e-06, "loss": 0.5778, "step": 6843 }, { "epoch": 0.6660827250608272, "grad_norm": 1.2561468622941703, "learning_rate": 2.6502303216171743e-06, "loss": 0.3605, "step": 6844 }, { "epoch": 0.6661800486618005, "grad_norm": 1.3090419627982983, "learning_rate": 2.6488391804234383e-06, "loss": 0.368, "step": 6845 }, { "epoch": 0.6662773722627737, "grad_norm": 1.3356484440632348, "learning_rate": 2.6474482728659955e-06, "loss": 0.3893, "step": 6846 }, { "epoch": 0.666374695863747, "grad_norm": 1.4774243297014897, "learning_rate": 2.646057599083065e-06, "loss": 0.491, "step": 6847 }, { "epoch": 0.6664720194647202, "grad_norm": 1.4784459610404423, "learning_rate": 2.6446671592128385e-06, "loss": 0.2695, "step": 6848 }, { "epoch": 0.6665693430656934, "grad_norm": 1.723875760255084, "learning_rate": 2.643276953393483e-06, "loss": 0.5801, "step": 6849 }, { "epoch": 0.6666666666666666, "grad_norm": 1.4119887346623394, "learning_rate": 2.6418869817631442e-06, "loss": 0.2102, "step": 6850 }, { "epoch": 0.6667639902676399, "grad_norm": 1.8746959439745785, "learning_rate": 2.6404972444599462e-06, "loss": 0.482, "step": 6851 }, { "epoch": 0.6668613138686131, "grad_norm": 1.4863633340883518, "learning_rate": 2.639107741621987e-06, "loss": 0.2951, "step": 6852 }, { "epoch": 0.6669586374695864, "grad_norm": 1.3542596002625418, "learning_rate": 2.637718473387343e-06, "loss": 0.3737, "step": 6853 }, { "epoch": 0.6670559610705596, "grad_norm": 1.4410889645922729, "learning_rate": 2.6363294398940664e-06, "loss": 0.3663, "step": 6854 }, { "epoch": 0.6671532846715329, "grad_norm": 1.6213355929590048, "learning_rate": 2.6349406412801857e-06, "loss": 0.5067, "step": 6855 }, { "epoch": 0.667250608272506, "grad_norm": 1.259669147907595, "learning_rate": 2.633552077683709e-06, "loss": 0.2239, "step": 6856 }, { "epoch": 0.6673479318734793, "grad_norm": 1.6313138956585955, "learning_rate": 2.6321637492426157e-06, "loss": 0.327, "step": 6857 }, { "epoch": 0.6674452554744525, "grad_norm": 1.5718047230597259, "learning_rate": 2.630775656094865e-06, "loss": 0.4699, "step": 6858 }, { "epoch": 0.6675425790754258, "grad_norm": 1.6635215488938018, "learning_rate": 2.6293877983783965e-06, "loss": 0.4299, "step": 6859 }, { "epoch": 0.667639902676399, "grad_norm": 1.725007751005146, "learning_rate": 2.628000176231117e-06, "loss": 0.4024, "step": 6860 }, { "epoch": 0.6677372262773723, "grad_norm": 1.5256270104582157, "learning_rate": 2.6266127897909175e-06, "loss": 0.2548, "step": 6861 }, { "epoch": 0.6678345498783455, "grad_norm": 1.4672932807075405, "learning_rate": 2.625225639195665e-06, "loss": 0.2651, "step": 6862 }, { "epoch": 0.6679318734793187, "grad_norm": 1.3575922797295445, "learning_rate": 2.6238387245831996e-06, "loss": 0.3917, "step": 6863 }, { "epoch": 0.668029197080292, "grad_norm": 1.456159315437296, "learning_rate": 2.6224520460913413e-06, "loss": 0.3906, "step": 6864 }, { "epoch": 0.6681265206812652, "grad_norm": 1.703706554550206, "learning_rate": 2.621065603857884e-06, "loss": 0.5739, "step": 6865 }, { "epoch": 0.6682238442822385, "grad_norm": 1.3691555136661493, "learning_rate": 2.619679398020602e-06, "loss": 0.4599, "step": 6866 }, { "epoch": 0.6683211678832117, "grad_norm": 1.17929550630756, "learning_rate": 2.618293428717239e-06, "loss": 0.2635, "step": 6867 }, { "epoch": 0.6684184914841849, "grad_norm": 1.834881223009904, "learning_rate": 2.6169076960855222e-06, "loss": 0.5044, "step": 6868 }, { "epoch": 0.6685158150851581, "grad_norm": 1.5147877376405583, "learning_rate": 2.6155222002631526e-06, "loss": 0.4497, "step": 6869 }, { "epoch": 0.6686131386861314, "grad_norm": 1.6463491219098636, "learning_rate": 2.614136941387809e-06, "loss": 0.3537, "step": 6870 }, { "epoch": 0.6687104622871046, "grad_norm": 1.4510859467851225, "learning_rate": 2.612751919597143e-06, "loss": 0.4066, "step": 6871 }, { "epoch": 0.6688077858880779, "grad_norm": 1.6114800029038248, "learning_rate": 2.611367135028785e-06, "loss": 0.3816, "step": 6872 }, { "epoch": 0.6689051094890511, "grad_norm": 1.6799964798027578, "learning_rate": 2.6099825878203434e-06, "loss": 0.4222, "step": 6873 }, { "epoch": 0.6690024330900244, "grad_norm": 1.4065643806843282, "learning_rate": 2.608598278109401e-06, "loss": 0.3696, "step": 6874 }, { "epoch": 0.6690997566909975, "grad_norm": 1.5204570862253417, "learning_rate": 2.607214206033518e-06, "loss": 0.4105, "step": 6875 }, { "epoch": 0.6691970802919708, "grad_norm": 1.6410568395577223, "learning_rate": 2.605830371730229e-06, "loss": 0.3413, "step": 6876 }, { "epoch": 0.669294403892944, "grad_norm": 1.6532507316787326, "learning_rate": 2.6044467753370505e-06, "loss": 0.2001, "step": 6877 }, { "epoch": 0.6693917274939173, "grad_norm": 1.5092478033141117, "learning_rate": 2.603063416991466e-06, "loss": 0.2827, "step": 6878 }, { "epoch": 0.6694890510948905, "grad_norm": 1.546751306385594, "learning_rate": 2.601680296830943e-06, "loss": 0.3002, "step": 6879 }, { "epoch": 0.6695863746958638, "grad_norm": 1.6644752241349323, "learning_rate": 2.6002974149929234e-06, "loss": 0.5049, "step": 6880 }, { "epoch": 0.6696836982968369, "grad_norm": 1.475128123004427, "learning_rate": 2.5989147716148266e-06, "loss": 0.3427, "step": 6881 }, { "epoch": 0.6697810218978102, "grad_norm": 1.5064062963413896, "learning_rate": 2.5975323668340424e-06, "loss": 0.512, "step": 6882 }, { "epoch": 0.6698783454987834, "grad_norm": 1.435389918874925, "learning_rate": 2.5961502007879435e-06, "loss": 0.2699, "step": 6883 }, { "epoch": 0.6699756690997567, "grad_norm": 1.455527995158438, "learning_rate": 2.5947682736138767e-06, "loss": 0.5053, "step": 6884 }, { "epoch": 0.67007299270073, "grad_norm": 1.3756279811800831, "learning_rate": 2.593386585449166e-06, "loss": 0.4458, "step": 6885 }, { "epoch": 0.6701703163017032, "grad_norm": 1.4437589801987945, "learning_rate": 2.5920051364311083e-06, "loss": 0.2007, "step": 6886 }, { "epoch": 0.6702676399026763, "grad_norm": 1.2319789761693942, "learning_rate": 2.5906239266969806e-06, "loss": 0.2167, "step": 6887 }, { "epoch": 0.6703649635036496, "grad_norm": 1.500079947566321, "learning_rate": 2.589242956384036e-06, "loss": 0.4666, "step": 6888 }, { "epoch": 0.6704622871046229, "grad_norm": 1.3702725725435745, "learning_rate": 2.5878622256294995e-06, "loss": 0.3863, "step": 6889 }, { "epoch": 0.6705596107055961, "grad_norm": 1.3110456583465535, "learning_rate": 2.586481734570575e-06, "loss": 0.2975, "step": 6890 }, { "epoch": 0.6706569343065694, "grad_norm": 1.2784160814190484, "learning_rate": 2.5851014833444447e-06, "loss": 0.2023, "step": 6891 }, { "epoch": 0.6707542579075426, "grad_norm": 1.2698345115553682, "learning_rate": 2.5837214720882662e-06, "loss": 0.2125, "step": 6892 }, { "epoch": 0.6708515815085158, "grad_norm": 1.5916352546881036, "learning_rate": 2.5823417009391684e-06, "loss": 0.4794, "step": 6893 }, { "epoch": 0.670948905109489, "grad_norm": 1.3308019251133874, "learning_rate": 2.5809621700342614e-06, "loss": 0.2422, "step": 6894 }, { "epoch": 0.6710462287104623, "grad_norm": 1.6001626945271217, "learning_rate": 2.5795828795106305e-06, "loss": 0.6117, "step": 6895 }, { "epoch": 0.6711435523114355, "grad_norm": 1.5937722896438413, "learning_rate": 2.578203829505337e-06, "loss": 0.2808, "step": 6896 }, { "epoch": 0.6712408759124088, "grad_norm": 1.6413766501516232, "learning_rate": 2.5768250201554167e-06, "loss": 0.5904, "step": 6897 }, { "epoch": 0.671338199513382, "grad_norm": 1.286238472667233, "learning_rate": 2.5754464515978845e-06, "loss": 0.1833, "step": 6898 }, { "epoch": 0.6714355231143553, "grad_norm": 1.7388750412253844, "learning_rate": 2.57406812396973e-06, "loss": 0.4257, "step": 6899 }, { "epoch": 0.6715328467153284, "grad_norm": 1.4820959162542136, "learning_rate": 2.5726900374079155e-06, "loss": 0.2638, "step": 6900 }, { "epoch": 0.6716301703163017, "grad_norm": 1.3416956345568392, "learning_rate": 2.5713121920493833e-06, "loss": 0.3294, "step": 6901 }, { "epoch": 0.6717274939172749, "grad_norm": 1.2547849624423444, "learning_rate": 2.5699345880310546e-06, "loss": 0.3069, "step": 6902 }, { "epoch": 0.6718248175182482, "grad_norm": 1.4256756469078642, "learning_rate": 2.5685572254898163e-06, "loss": 0.4298, "step": 6903 }, { "epoch": 0.6719221411192214, "grad_norm": 1.553886168720797, "learning_rate": 2.5671801045625413e-06, "loss": 0.1912, "step": 6904 }, { "epoch": 0.6720194647201947, "grad_norm": 1.4413122958031002, "learning_rate": 2.565803225386075e-06, "loss": 0.3262, "step": 6905 }, { "epoch": 0.6721167883211678, "grad_norm": 1.6352013448228178, "learning_rate": 2.564426588097238e-06, "loss": 0.3957, "step": 6906 }, { "epoch": 0.6722141119221411, "grad_norm": 1.4056108505737754, "learning_rate": 2.5630501928328276e-06, "loss": 0.3701, "step": 6907 }, { "epoch": 0.6723114355231143, "grad_norm": 1.3570337852230339, "learning_rate": 2.5616740397296184e-06, "loss": 0.2851, "step": 6908 }, { "epoch": 0.6724087591240876, "grad_norm": 1.7010645170092262, "learning_rate": 2.560298128924358e-06, "loss": 0.6163, "step": 6909 }, { "epoch": 0.6725060827250608, "grad_norm": 1.697343163864623, "learning_rate": 2.5589224605537744e-06, "loss": 0.3209, "step": 6910 }, { "epoch": 0.6726034063260341, "grad_norm": 1.2657390590182054, "learning_rate": 2.557547034754564e-06, "loss": 0.3305, "step": 6911 }, { "epoch": 0.6727007299270072, "grad_norm": 1.7582771195861744, "learning_rate": 2.5561718516634058e-06, "loss": 0.4296, "step": 6912 }, { "epoch": 0.6727980535279805, "grad_norm": 1.3308413384247961, "learning_rate": 2.5547969114169554e-06, "loss": 0.3097, "step": 6913 }, { "epoch": 0.6728953771289538, "grad_norm": 1.4967437208740688, "learning_rate": 2.553422214151836e-06, "loss": 0.398, "step": 6914 }, { "epoch": 0.672992700729927, "grad_norm": 1.7208607284298814, "learning_rate": 2.5520477600046556e-06, "loss": 0.224, "step": 6915 }, { "epoch": 0.6730900243309003, "grad_norm": 1.6043783601399488, "learning_rate": 2.550673549111994e-06, "loss": 0.3508, "step": 6916 }, { "epoch": 0.6731873479318735, "grad_norm": 1.5019913806185416, "learning_rate": 2.549299581610407e-06, "loss": 0.2414, "step": 6917 }, { "epoch": 0.6732846715328468, "grad_norm": 1.3128864082829395, "learning_rate": 2.5479258576364274e-06, "loss": 0.4193, "step": 6918 }, { "epoch": 0.6733819951338199, "grad_norm": 1.4228482112177416, "learning_rate": 2.546552377326562e-06, "loss": 0.3994, "step": 6919 }, { "epoch": 0.6734793187347932, "grad_norm": 1.5312201049377667, "learning_rate": 2.545179140817297e-06, "loss": 0.5103, "step": 6920 }, { "epoch": 0.6735766423357664, "grad_norm": 1.5038578174273096, "learning_rate": 2.5438061482450877e-06, "loss": 0.5274, "step": 6921 }, { "epoch": 0.6736739659367397, "grad_norm": 1.5488599891800303, "learning_rate": 2.5424333997463713e-06, "loss": 0.4173, "step": 6922 }, { "epoch": 0.6737712895377129, "grad_norm": 1.4837450799000769, "learning_rate": 2.5410608954575577e-06, "loss": 0.4064, "step": 6923 }, { "epoch": 0.6738686131386862, "grad_norm": 1.5839213065916038, "learning_rate": 2.5396886355150375e-06, "loss": 0.4168, "step": 6924 }, { "epoch": 0.6739659367396593, "grad_norm": 1.3837122172337968, "learning_rate": 2.538316620055167e-06, "loss": 0.3702, "step": 6925 }, { "epoch": 0.6740632603406326, "grad_norm": 1.4007117675977598, "learning_rate": 2.536944849214287e-06, "loss": 0.2946, "step": 6926 }, { "epoch": 0.6741605839416058, "grad_norm": 1.3089715213081934, "learning_rate": 2.5355733231287115e-06, "loss": 0.2765, "step": 6927 }, { "epoch": 0.6742579075425791, "grad_norm": 1.5761921442895896, "learning_rate": 2.5342020419347296e-06, "loss": 0.3464, "step": 6928 }, { "epoch": 0.6743552311435523, "grad_norm": 1.4211112685589127, "learning_rate": 2.532831005768607e-06, "loss": 0.4043, "step": 6929 }, { "epoch": 0.6744525547445256, "grad_norm": 1.1801705287300368, "learning_rate": 2.5314602147665823e-06, "loss": 0.2711, "step": 6930 }, { "epoch": 0.6745498783454987, "grad_norm": 2.3498059172330232, "learning_rate": 2.530089669064877e-06, "loss": 0.3169, "step": 6931 }, { "epoch": 0.674647201946472, "grad_norm": 1.3312598089295549, "learning_rate": 2.5287193687996757e-06, "loss": 0.4139, "step": 6932 }, { "epoch": 0.6747445255474452, "grad_norm": 1.3254325952245336, "learning_rate": 2.5273493141071517e-06, "loss": 0.1808, "step": 6933 }, { "epoch": 0.6748418491484185, "grad_norm": 1.7734026800425997, "learning_rate": 2.525979505123445e-06, "loss": 0.3839, "step": 6934 }, { "epoch": 0.6749391727493917, "grad_norm": 1.4758122066807204, "learning_rate": 2.524609941984677e-06, "loss": 0.4302, "step": 6935 }, { "epoch": 0.675036496350365, "grad_norm": 1.7813840447567562, "learning_rate": 2.523240624826939e-06, "loss": 0.3517, "step": 6936 }, { "epoch": 0.6751338199513383, "grad_norm": 1.3637936566407973, "learning_rate": 2.521871553786303e-06, "loss": 0.4106, "step": 6937 }, { "epoch": 0.6752311435523114, "grad_norm": 1.4165533057839517, "learning_rate": 2.5205027289988136e-06, "loss": 0.3022, "step": 6938 }, { "epoch": 0.6753284671532847, "grad_norm": 1.6688244520859234, "learning_rate": 2.519134150600492e-06, "loss": 0.1797, "step": 6939 }, { "epoch": 0.6754257907542579, "grad_norm": 1.8398284587508136, "learning_rate": 2.5177658187273346e-06, "loss": 0.4476, "step": 6940 }, { "epoch": 0.6755231143552312, "grad_norm": 1.4645344725589344, "learning_rate": 2.5163977335153136e-06, "loss": 0.2885, "step": 6941 }, { "epoch": 0.6756204379562044, "grad_norm": 1.6161709599002068, "learning_rate": 2.5150298951003783e-06, "loss": 0.2876, "step": 6942 }, { "epoch": 0.6757177615571777, "grad_norm": 1.4815866079825117, "learning_rate": 2.5136623036184483e-06, "loss": 0.4605, "step": 6943 }, { "epoch": 0.6758150851581508, "grad_norm": 1.609615196895527, "learning_rate": 2.5122949592054225e-06, "loss": 0.4088, "step": 6944 }, { "epoch": 0.6759124087591241, "grad_norm": 1.383240541923229, "learning_rate": 2.510927861997176e-06, "loss": 0.3329, "step": 6945 }, { "epoch": 0.6760097323600973, "grad_norm": 1.3351640749055282, "learning_rate": 2.50956101212956e-06, "loss": 0.3231, "step": 6946 }, { "epoch": 0.6761070559610706, "grad_norm": 1.4935226458942055, "learning_rate": 2.508194409738395e-06, "loss": 0.4479, "step": 6947 }, { "epoch": 0.6762043795620438, "grad_norm": 1.9088923791770325, "learning_rate": 2.5068280549594827e-06, "loss": 0.2989, "step": 6948 }, { "epoch": 0.6763017031630171, "grad_norm": 1.893732276052998, "learning_rate": 2.505461947928599e-06, "loss": 0.4451, "step": 6949 }, { "epoch": 0.6763990267639902, "grad_norm": 1.5889441642097806, "learning_rate": 2.5040960887814947e-06, "loss": 0.4973, "step": 6950 }, { "epoch": 0.6764963503649635, "grad_norm": 1.2679244451485847, "learning_rate": 2.5027304776538964e-06, "loss": 0.2481, "step": 6951 }, { "epoch": 0.6765936739659367, "grad_norm": 1.7820610742945566, "learning_rate": 2.5013651146815055e-06, "loss": 0.3566, "step": 6952 }, { "epoch": 0.67669099756691, "grad_norm": 1.446927709422681, "learning_rate": 2.5000000000000015e-06, "loss": 0.2403, "step": 6953 }, { "epoch": 0.6767883211678832, "grad_norm": 1.4698336235923033, "learning_rate": 2.4986351337450315e-06, "loss": 0.2056, "step": 6954 }, { "epoch": 0.6768856447688565, "grad_norm": 1.4352353542997942, "learning_rate": 2.4972705160522255e-06, "loss": 0.2377, "step": 6955 }, { "epoch": 0.6769829683698296, "grad_norm": 1.5869182692251935, "learning_rate": 2.495906147057187e-06, "loss": 0.5277, "step": 6956 }, { "epoch": 0.6770802919708029, "grad_norm": 1.6685424818530001, "learning_rate": 2.4945420268954957e-06, "loss": 0.3761, "step": 6957 }, { "epoch": 0.6771776155717761, "grad_norm": 1.744336705650269, "learning_rate": 2.4931781557027013e-06, "loss": 0.4476, "step": 6958 }, { "epoch": 0.6772749391727494, "grad_norm": 1.562944806682162, "learning_rate": 2.491814533614334e-06, "loss": 0.4595, "step": 6959 }, { "epoch": 0.6773722627737226, "grad_norm": 1.3864422589675964, "learning_rate": 2.4904511607658986e-06, "loss": 0.3829, "step": 6960 }, { "epoch": 0.6774695863746959, "grad_norm": 1.4034962288344124, "learning_rate": 2.4890880372928736e-06, "loss": 0.3745, "step": 6961 }, { "epoch": 0.6775669099756692, "grad_norm": 1.3183694118541551, "learning_rate": 2.4877251633307137e-06, "loss": 0.3473, "step": 6962 }, { "epoch": 0.6776642335766423, "grad_norm": 1.094957938061393, "learning_rate": 2.4863625390148487e-06, "loss": 0.2575, "step": 6963 }, { "epoch": 0.6777615571776155, "grad_norm": 1.534354041552699, "learning_rate": 2.485000164480685e-06, "loss": 0.331, "step": 6964 }, { "epoch": 0.6778588807785888, "grad_norm": 1.4041273415300692, "learning_rate": 2.4836380398635982e-06, "loss": 0.2992, "step": 6965 }, { "epoch": 0.677956204379562, "grad_norm": 1.2802825624793999, "learning_rate": 2.482276165298947e-06, "loss": 0.3063, "step": 6966 }, { "epoch": 0.6780535279805353, "grad_norm": 1.4204287037666237, "learning_rate": 2.4809145409220623e-06, "loss": 0.4712, "step": 6967 }, { "epoch": 0.6781508515815086, "grad_norm": 1.3437498669291585, "learning_rate": 2.4795531668682466e-06, "loss": 0.2643, "step": 6968 }, { "epoch": 0.6782481751824817, "grad_norm": 1.6285738891807648, "learning_rate": 2.4781920432727813e-06, "loss": 0.5092, "step": 6969 }, { "epoch": 0.678345498783455, "grad_norm": 1.6169111964859966, "learning_rate": 2.476831170270921e-06, "loss": 0.5218, "step": 6970 }, { "epoch": 0.6784428223844282, "grad_norm": 1.2529390116201187, "learning_rate": 2.475470547997902e-06, "loss": 0.298, "step": 6971 }, { "epoch": 0.6785401459854015, "grad_norm": 1.6428611411762748, "learning_rate": 2.474110176588924e-06, "loss": 0.5253, "step": 6972 }, { "epoch": 0.6786374695863747, "grad_norm": 1.3779755820587174, "learning_rate": 2.4727500561791707e-06, "loss": 0.3648, "step": 6973 }, { "epoch": 0.678734793187348, "grad_norm": 1.4918542459847803, "learning_rate": 2.4713901869037976e-06, "loss": 0.2401, "step": 6974 }, { "epoch": 0.6788321167883211, "grad_norm": 1.5435250740053599, "learning_rate": 2.470030568897938e-06, "loss": 0.4601, "step": 6975 }, { "epoch": 0.6789294403892944, "grad_norm": 1.5073842443557104, "learning_rate": 2.4686712022966937e-06, "loss": 0.2173, "step": 6976 }, { "epoch": 0.6790267639902676, "grad_norm": 1.4464958501572325, "learning_rate": 2.467312087235148e-06, "loss": 0.2882, "step": 6977 }, { "epoch": 0.6791240875912409, "grad_norm": 1.371599500795431, "learning_rate": 2.4659532238483586e-06, "loss": 0.3791, "step": 6978 }, { "epoch": 0.6792214111922141, "grad_norm": 1.792751456995659, "learning_rate": 2.4645946122713534e-06, "loss": 0.3043, "step": 6979 }, { "epoch": 0.6793187347931874, "grad_norm": 1.387658716695058, "learning_rate": 2.4632362526391374e-06, "loss": 0.3579, "step": 6980 }, { "epoch": 0.6794160583941606, "grad_norm": 1.178665779721529, "learning_rate": 2.4618781450866963e-06, "loss": 0.2894, "step": 6981 }, { "epoch": 0.6795133819951338, "grad_norm": 1.6554744992194588, "learning_rate": 2.460520289748986e-06, "loss": 0.476, "step": 6982 }, { "epoch": 0.679610705596107, "grad_norm": 1.4270076128322857, "learning_rate": 2.459162686760934e-06, "loss": 0.4358, "step": 6983 }, { "epoch": 0.6797080291970803, "grad_norm": 1.4423335537670423, "learning_rate": 2.4578053362574466e-06, "loss": 0.2762, "step": 6984 }, { "epoch": 0.6798053527980535, "grad_norm": 1.4052268226638183, "learning_rate": 2.4564482383734083e-06, "loss": 0.3274, "step": 6985 }, { "epoch": 0.6799026763990268, "grad_norm": 1.4243597600104019, "learning_rate": 2.4550913932436694e-06, "loss": 0.4244, "step": 6986 }, { "epoch": 0.68, "grad_norm": 1.617023994178133, "learning_rate": 2.453734801003063e-06, "loss": 0.4276, "step": 6987 }, { "epoch": 0.6800973236009732, "grad_norm": 1.6336278750386801, "learning_rate": 2.452378461786395e-06, "loss": 0.4974, "step": 6988 }, { "epoch": 0.6801946472019464, "grad_norm": 1.1033290468998957, "learning_rate": 2.451022375728447e-06, "loss": 0.2591, "step": 6989 }, { "epoch": 0.6802919708029197, "grad_norm": 1.2920284482404487, "learning_rate": 2.4496665429639675e-06, "loss": 0.3202, "step": 6990 }, { "epoch": 0.680389294403893, "grad_norm": 1.2120326842505735, "learning_rate": 2.448310963627694e-06, "loss": 0.2522, "step": 6991 }, { "epoch": 0.6804866180048662, "grad_norm": 1.6963667865487155, "learning_rate": 2.4469556378543284e-06, "loss": 0.3964, "step": 6992 }, { "epoch": 0.6805839416058395, "grad_norm": 1.5656926915198155, "learning_rate": 2.4456005657785518e-06, "loss": 0.4468, "step": 6993 }, { "epoch": 0.6806812652068126, "grad_norm": 1.4530998966140611, "learning_rate": 2.4442457475350155e-06, "loss": 0.4109, "step": 6994 }, { "epoch": 0.6807785888077859, "grad_norm": 1.520292586600981, "learning_rate": 2.4428911832583504e-06, "loss": 0.4263, "step": 6995 }, { "epoch": 0.6808759124087591, "grad_norm": 1.427880818466438, "learning_rate": 2.4415368730831613e-06, "loss": 0.4779, "step": 6996 }, { "epoch": 0.6809732360097324, "grad_norm": 1.4461973214014017, "learning_rate": 2.4401828171440237e-06, "loss": 0.3459, "step": 6997 }, { "epoch": 0.6810705596107056, "grad_norm": 1.8635765497458632, "learning_rate": 2.4388290155754934e-06, "loss": 0.4807, "step": 6998 }, { "epoch": 0.6811678832116789, "grad_norm": 1.5011692258527074, "learning_rate": 2.4374754685120982e-06, "loss": 0.2618, "step": 6999 }, { "epoch": 0.681265206812652, "grad_norm": 1.5935035963876705, "learning_rate": 2.4361221760883407e-06, "loss": 0.4121, "step": 7000 }, { "epoch": 0.6813625304136253, "grad_norm": 1.3657851083856138, "learning_rate": 2.434769138438698e-06, "loss": 0.3803, "step": 7001 }, { "epoch": 0.6814598540145985, "grad_norm": 1.3890571757194168, "learning_rate": 2.433416355697623e-06, "loss": 0.2878, "step": 7002 }, { "epoch": 0.6815571776155718, "grad_norm": 1.4534767299451985, "learning_rate": 2.4320638279995443e-06, "loss": 0.4363, "step": 7003 }, { "epoch": 0.681654501216545, "grad_norm": 1.5975253697441922, "learning_rate": 2.4307115554788595e-06, "loss": 0.407, "step": 7004 }, { "epoch": 0.6817518248175183, "grad_norm": 1.4143137840276294, "learning_rate": 2.4293595382699464e-06, "loss": 0.234, "step": 7005 }, { "epoch": 0.6818491484184915, "grad_norm": 1.4106838686942245, "learning_rate": 2.4280077765071565e-06, "loss": 0.4045, "step": 7006 }, { "epoch": 0.6819464720194647, "grad_norm": 1.725546559530535, "learning_rate": 2.426656270324816e-06, "loss": 0.3169, "step": 7007 }, { "epoch": 0.6820437956204379, "grad_norm": 1.6297421586113854, "learning_rate": 2.425305019857222e-06, "loss": 0.6467, "step": 7008 }, { "epoch": 0.6821411192214112, "grad_norm": 1.650304378987235, "learning_rate": 2.4239540252386507e-06, "loss": 0.4968, "step": 7009 }, { "epoch": 0.6822384428223844, "grad_norm": 1.5135282977520814, "learning_rate": 2.422603286603351e-06, "loss": 0.5243, "step": 7010 }, { "epoch": 0.6823357664233577, "grad_norm": 1.4455538161613704, "learning_rate": 2.4212528040855477e-06, "loss": 0.3628, "step": 7011 }, { "epoch": 0.682433090024331, "grad_norm": 1.4059383470846873, "learning_rate": 2.4199025778194373e-06, "loss": 0.3781, "step": 7012 }, { "epoch": 0.6825304136253041, "grad_norm": 1.8515411890791325, "learning_rate": 2.418552607939194e-06, "loss": 0.5707, "step": 7013 }, { "epoch": 0.6826277372262773, "grad_norm": 2.0140690675425676, "learning_rate": 2.4172028945789674e-06, "loss": 0.4568, "step": 7014 }, { "epoch": 0.6827250608272506, "grad_norm": 1.2638049284093802, "learning_rate": 2.415853437872874e-06, "loss": 0.333, "step": 7015 }, { "epoch": 0.6828223844282238, "grad_norm": 1.2303873671003442, "learning_rate": 2.4145042379550126e-06, "loss": 0.249, "step": 7016 }, { "epoch": 0.6829197080291971, "grad_norm": 1.4612765811170028, "learning_rate": 2.4131552949594544e-06, "loss": 0.2459, "step": 7017 }, { "epoch": 0.6830170316301704, "grad_norm": 1.3299067942865632, "learning_rate": 2.4118066090202467e-06, "loss": 0.4052, "step": 7018 }, { "epoch": 0.6831143552311435, "grad_norm": 1.7221463026041783, "learning_rate": 2.410458180271405e-06, "loss": 0.4442, "step": 7019 }, { "epoch": 0.6832116788321168, "grad_norm": 1.2746340506968268, "learning_rate": 2.409110008846926e-06, "loss": 0.2624, "step": 7020 }, { "epoch": 0.68330900243309, "grad_norm": 1.5201577902758732, "learning_rate": 2.4077620948807775e-06, "loss": 0.5161, "step": 7021 }, { "epoch": 0.6834063260340633, "grad_norm": 1.6101704085540542, "learning_rate": 2.4064144385069027e-06, "loss": 0.4633, "step": 7022 }, { "epoch": 0.6835036496350365, "grad_norm": 1.811203591625575, "learning_rate": 2.4050670398592197e-06, "loss": 0.3546, "step": 7023 }, { "epoch": 0.6836009732360098, "grad_norm": 1.483058305310578, "learning_rate": 2.40371989907162e-06, "loss": 0.3693, "step": 7024 }, { "epoch": 0.683698296836983, "grad_norm": 1.350438527125783, "learning_rate": 2.4023730162779712e-06, "loss": 0.2979, "step": 7025 }, { "epoch": 0.6837956204379562, "grad_norm": 2.122168393420503, "learning_rate": 2.4010263916121114e-06, "loss": 0.4741, "step": 7026 }, { "epoch": 0.6838929440389294, "grad_norm": 1.4059707364310394, "learning_rate": 2.3996800252078555e-06, "loss": 0.2625, "step": 7027 }, { "epoch": 0.6839902676399027, "grad_norm": 1.6011541078488662, "learning_rate": 2.3983339171989944e-06, "loss": 0.5337, "step": 7028 }, { "epoch": 0.6840875912408759, "grad_norm": 1.6664607715584878, "learning_rate": 2.3969880677192924e-06, "loss": 0.5497, "step": 7029 }, { "epoch": 0.6841849148418492, "grad_norm": 1.4867998094055201, "learning_rate": 2.3956424769024843e-06, "loss": 0.4703, "step": 7030 }, { "epoch": 0.6842822384428224, "grad_norm": 1.6682610356191527, "learning_rate": 2.3942971448822842e-06, "loss": 0.2932, "step": 7031 }, { "epoch": 0.6843795620437956, "grad_norm": 1.5716231460991188, "learning_rate": 2.3929520717923787e-06, "loss": 0.3262, "step": 7032 }, { "epoch": 0.6844768856447688, "grad_norm": 1.496820099208088, "learning_rate": 2.391607257766428e-06, "loss": 0.3892, "step": 7033 }, { "epoch": 0.6845742092457421, "grad_norm": 1.7206930101770672, "learning_rate": 2.390262702938067e-06, "loss": 0.3531, "step": 7034 }, { "epoch": 0.6846715328467153, "grad_norm": 1.5024064946077917, "learning_rate": 2.388918407440906e-06, "loss": 0.2519, "step": 7035 }, { "epoch": 0.6847688564476886, "grad_norm": 1.7563921127653073, "learning_rate": 2.387574371408529e-06, "loss": 0.469, "step": 7036 }, { "epoch": 0.6848661800486618, "grad_norm": 1.6396186285166314, "learning_rate": 2.3862305949744906e-06, "loss": 0.6244, "step": 7037 }, { "epoch": 0.684963503649635, "grad_norm": 1.253278248232481, "learning_rate": 2.384887078272325e-06, "loss": 0.3874, "step": 7038 }, { "epoch": 0.6850608272506082, "grad_norm": 1.6037098700719943, "learning_rate": 2.3835438214355394e-06, "loss": 0.6085, "step": 7039 }, { "epoch": 0.6851581508515815, "grad_norm": 1.4916227380404068, "learning_rate": 2.382200824597611e-06, "loss": 0.304, "step": 7040 }, { "epoch": 0.6852554744525547, "grad_norm": 1.456903692394162, "learning_rate": 2.3808580878919948e-06, "loss": 0.4673, "step": 7041 }, { "epoch": 0.685352798053528, "grad_norm": 1.4472802865422167, "learning_rate": 2.3795156114521206e-06, "loss": 0.4213, "step": 7042 }, { "epoch": 0.6854501216545013, "grad_norm": 1.4508606691248402, "learning_rate": 2.3781733954113913e-06, "loss": 0.4607, "step": 7043 }, { "epoch": 0.6855474452554745, "grad_norm": 1.2246391621302561, "learning_rate": 2.376831439903183e-06, "loss": 0.2983, "step": 7044 }, { "epoch": 0.6856447688564477, "grad_norm": 1.2407074754334955, "learning_rate": 2.375489745060846e-06, "loss": 0.3823, "step": 7045 }, { "epoch": 0.6857420924574209, "grad_norm": 1.2471204970141723, "learning_rate": 2.374148311017707e-06, "loss": 0.3476, "step": 7046 }, { "epoch": 0.6858394160583942, "grad_norm": 1.7914390271495106, "learning_rate": 2.372807137907066e-06, "loss": 0.4304, "step": 7047 }, { "epoch": 0.6859367396593674, "grad_norm": 1.6806619346024443, "learning_rate": 2.371466225862193e-06, "loss": 0.3429, "step": 7048 }, { "epoch": 0.6860340632603407, "grad_norm": 1.2367672494426036, "learning_rate": 2.370125575016336e-06, "loss": 0.2466, "step": 7049 }, { "epoch": 0.6861313868613139, "grad_norm": 1.4040122559466237, "learning_rate": 2.3687851855027196e-06, "loss": 0.3746, "step": 7050 }, { "epoch": 0.6862287104622871, "grad_norm": 1.74319004926358, "learning_rate": 2.3674450574545342e-06, "loss": 0.513, "step": 7051 }, { "epoch": 0.6863260340632603, "grad_norm": 1.5251869524818102, "learning_rate": 2.366105191004952e-06, "loss": 0.4837, "step": 7052 }, { "epoch": 0.6864233576642336, "grad_norm": 1.2669444800107412, "learning_rate": 2.3647655862871155e-06, "loss": 0.2992, "step": 7053 }, { "epoch": 0.6865206812652068, "grad_norm": 1.7047724324160347, "learning_rate": 2.3634262434341426e-06, "loss": 0.2762, "step": 7054 }, { "epoch": 0.6866180048661801, "grad_norm": 1.6635440502336785, "learning_rate": 2.362087162579125e-06, "loss": 0.4159, "step": 7055 }, { "epoch": 0.6867153284671533, "grad_norm": 1.370775147723081, "learning_rate": 2.3607483438551266e-06, "loss": 0.2887, "step": 7056 }, { "epoch": 0.6868126520681265, "grad_norm": 1.8890066234821863, "learning_rate": 2.3594097873951894e-06, "loss": 0.5529, "step": 7057 }, { "epoch": 0.6869099756690997, "grad_norm": 1.5276484270920698, "learning_rate": 2.3580714933323234e-06, "loss": 0.3333, "step": 7058 }, { "epoch": 0.687007299270073, "grad_norm": 1.5421443233610488, "learning_rate": 2.3567334617995165e-06, "loss": 0.4768, "step": 7059 }, { "epoch": 0.6871046228710462, "grad_norm": 1.5257211637214352, "learning_rate": 2.35539569292973e-06, "loss": 0.5934, "step": 7060 }, { "epoch": 0.6872019464720195, "grad_norm": 1.9726873716411073, "learning_rate": 2.3540581868559016e-06, "loss": 0.31, "step": 7061 }, { "epoch": 0.6872992700729927, "grad_norm": 1.6297929943608709, "learning_rate": 2.352720943710935e-06, "loss": 0.32, "step": 7062 }, { "epoch": 0.6873965936739659, "grad_norm": 1.344617164610974, "learning_rate": 2.351383963627716e-06, "loss": 0.2451, "step": 7063 }, { "epoch": 0.6874939172749391, "grad_norm": 1.5779151399274474, "learning_rate": 2.3500472467390994e-06, "loss": 0.4428, "step": 7064 }, { "epoch": 0.6875912408759124, "grad_norm": 1.7882624673919416, "learning_rate": 2.348710793177918e-06, "loss": 0.4275, "step": 7065 }, { "epoch": 0.6876885644768856, "grad_norm": 1.5227467853998835, "learning_rate": 2.3473746030769738e-06, "loss": 0.2117, "step": 7066 }, { "epoch": 0.6877858880778589, "grad_norm": 1.6059188026338929, "learning_rate": 2.346038676569046e-06, "loss": 0.4718, "step": 7067 }, { "epoch": 0.6878832116788322, "grad_norm": 1.4027488357193745, "learning_rate": 2.344703013786888e-06, "loss": 0.2858, "step": 7068 }, { "epoch": 0.6879805352798054, "grad_norm": 1.3051811700517728, "learning_rate": 2.3433676148632218e-06, "loss": 0.252, "step": 7069 }, { "epoch": 0.6880778588807785, "grad_norm": 1.6732824942982871, "learning_rate": 2.3420324799307486e-06, "loss": 0.4898, "step": 7070 }, { "epoch": 0.6881751824817518, "grad_norm": 1.751569725110196, "learning_rate": 2.3406976091221413e-06, "loss": 0.659, "step": 7071 }, { "epoch": 0.688272506082725, "grad_norm": 1.605504687572991, "learning_rate": 2.339363002570049e-06, "loss": 0.3602, "step": 7072 }, { "epoch": 0.6883698296836983, "grad_norm": 1.4018499413477323, "learning_rate": 2.3380286604070888e-06, "loss": 0.3024, "step": 7073 }, { "epoch": 0.6884671532846716, "grad_norm": 1.5688080674325353, "learning_rate": 2.336694582765857e-06, "loss": 0.3225, "step": 7074 }, { "epoch": 0.6885644768856448, "grad_norm": 1.4753661784155816, "learning_rate": 2.3353607697789218e-06, "loss": 0.3374, "step": 7075 }, { "epoch": 0.688661800486618, "grad_norm": 1.1487316351783057, "learning_rate": 2.334027221578824e-06, "loss": 0.2517, "step": 7076 }, { "epoch": 0.6887591240875912, "grad_norm": 1.4174448942476598, "learning_rate": 2.33269393829808e-06, "loss": 0.2702, "step": 7077 }, { "epoch": 0.6888564476885645, "grad_norm": 1.488949282846272, "learning_rate": 2.331360920069179e-06, "loss": 0.4764, "step": 7078 }, { "epoch": 0.6889537712895377, "grad_norm": 1.9664262651615338, "learning_rate": 2.3300281670245855e-06, "loss": 0.4146, "step": 7079 }, { "epoch": 0.689051094890511, "grad_norm": 1.6565485271389164, "learning_rate": 2.328695679296732e-06, "loss": 0.4088, "step": 7080 }, { "epoch": 0.6891484184914842, "grad_norm": 1.3812187191282164, "learning_rate": 2.327363457018031e-06, "loss": 0.4236, "step": 7081 }, { "epoch": 0.6892457420924574, "grad_norm": 1.779570607674496, "learning_rate": 2.326031500320866e-06, "loss": 0.3915, "step": 7082 }, { "epoch": 0.6893430656934306, "grad_norm": 1.5587790625217628, "learning_rate": 2.324699809337596e-06, "loss": 0.4618, "step": 7083 }, { "epoch": 0.6894403892944039, "grad_norm": 1.564654886139345, "learning_rate": 2.3233683842005488e-06, "loss": 0.514, "step": 7084 }, { "epoch": 0.6895377128953771, "grad_norm": 1.6886866600418275, "learning_rate": 2.3220372250420304e-06, "loss": 0.4877, "step": 7085 }, { "epoch": 0.6896350364963504, "grad_norm": 1.1663450865093852, "learning_rate": 2.320706331994319e-06, "loss": 0.1601, "step": 7086 }, { "epoch": 0.6897323600973236, "grad_norm": 1.6581865181782154, "learning_rate": 2.319375705189666e-06, "loss": 0.4446, "step": 7087 }, { "epoch": 0.6898296836982969, "grad_norm": 1.451546334463058, "learning_rate": 2.318045344760297e-06, "loss": 0.4061, "step": 7088 }, { "epoch": 0.68992700729927, "grad_norm": 1.7332038017926754, "learning_rate": 2.3167152508384104e-06, "loss": 0.3531, "step": 7089 }, { "epoch": 0.6900243309002433, "grad_norm": 1.527844437194958, "learning_rate": 2.3153854235561805e-06, "loss": 0.3408, "step": 7090 }, { "epoch": 0.6901216545012165, "grad_norm": 1.7059815275929582, "learning_rate": 2.314055863045749e-06, "loss": 0.3583, "step": 7091 }, { "epoch": 0.6902189781021898, "grad_norm": 1.3737638291972332, "learning_rate": 2.3127265694392383e-06, "loss": 0.3684, "step": 7092 }, { "epoch": 0.690316301703163, "grad_norm": 1.1311901182058706, "learning_rate": 2.3113975428687392e-06, "loss": 0.2227, "step": 7093 }, { "epoch": 0.6904136253041363, "grad_norm": 1.6094495144972942, "learning_rate": 2.3100687834663205e-06, "loss": 0.4757, "step": 7094 }, { "epoch": 0.6905109489051094, "grad_norm": 1.4838140130314696, "learning_rate": 2.308740291364019e-06, "loss": 0.4687, "step": 7095 }, { "epoch": 0.6906082725060827, "grad_norm": 1.51742116933878, "learning_rate": 2.307412066693849e-06, "loss": 0.522, "step": 7096 }, { "epoch": 0.690705596107056, "grad_norm": 1.368798924795383, "learning_rate": 2.306084109587796e-06, "loss": 0.2961, "step": 7097 }, { "epoch": 0.6908029197080292, "grad_norm": 1.100628720850723, "learning_rate": 2.3047564201778217e-06, "loss": 0.2135, "step": 7098 }, { "epoch": 0.6909002433090025, "grad_norm": 2.5028431937357163, "learning_rate": 2.303428998595858e-06, "loss": 0.5036, "step": 7099 }, { "epoch": 0.6909975669099757, "grad_norm": 1.588738434492104, "learning_rate": 2.3021018449738125e-06, "loss": 0.3995, "step": 7100 }, { "epoch": 0.6910948905109489, "grad_norm": 1.3040063416203949, "learning_rate": 2.300774959443566e-06, "loss": 0.2657, "step": 7101 }, { "epoch": 0.6911922141119221, "grad_norm": 1.2538338993863447, "learning_rate": 2.29944834213697e-06, "loss": 0.286, "step": 7102 }, { "epoch": 0.6912895377128954, "grad_norm": 1.4370473895504778, "learning_rate": 2.2981219931858523e-06, "loss": 0.4474, "step": 7103 }, { "epoch": 0.6913868613138686, "grad_norm": 1.495019352637735, "learning_rate": 2.296795912722014e-06, "loss": 0.3265, "step": 7104 }, { "epoch": 0.6914841849148419, "grad_norm": 1.3513107947436753, "learning_rate": 2.2954701008772257e-06, "loss": 0.3779, "step": 7105 }, { "epoch": 0.6915815085158151, "grad_norm": 1.3609525800147586, "learning_rate": 2.294144557783236e-06, "loss": 0.3096, "step": 7106 }, { "epoch": 0.6916788321167883, "grad_norm": 1.5810258849773795, "learning_rate": 2.2928192835717642e-06, "loss": 0.3473, "step": 7107 }, { "epoch": 0.6917761557177615, "grad_norm": 1.3028614262475204, "learning_rate": 2.291494278374505e-06, "loss": 0.3806, "step": 7108 }, { "epoch": 0.6918734793187348, "grad_norm": 1.4181698322901153, "learning_rate": 2.2901695423231235e-06, "loss": 0.3953, "step": 7109 }, { "epoch": 0.691970802919708, "grad_norm": 1.6141866955608357, "learning_rate": 2.2888450755492604e-06, "loss": 0.4435, "step": 7110 }, { "epoch": 0.6920681265206813, "grad_norm": 1.3275763332022836, "learning_rate": 2.287520878184528e-06, "loss": 0.2836, "step": 7111 }, { "epoch": 0.6921654501216545, "grad_norm": 1.429484201787624, "learning_rate": 2.286196950360516e-06, "loss": 0.3437, "step": 7112 }, { "epoch": 0.6922627737226278, "grad_norm": 1.3502231201592745, "learning_rate": 2.2848732922087784e-06, "loss": 0.2494, "step": 7113 }, { "epoch": 0.6923600973236009, "grad_norm": 1.4890264612822304, "learning_rate": 2.283549903860851e-06, "loss": 0.4353, "step": 7114 }, { "epoch": 0.6924574209245742, "grad_norm": 1.4526377035185738, "learning_rate": 2.282226785448242e-06, "loss": 0.3106, "step": 7115 }, { "epoch": 0.6925547445255474, "grad_norm": 1.5784710042113108, "learning_rate": 2.2809039371024243e-06, "loss": 0.3609, "step": 7116 }, { "epoch": 0.6926520681265207, "grad_norm": 1.3720563243003592, "learning_rate": 2.2795813589548544e-06, "loss": 0.4049, "step": 7117 }, { "epoch": 0.692749391727494, "grad_norm": 2.34320936961282, "learning_rate": 2.278259051136955e-06, "loss": 0.4059, "step": 7118 }, { "epoch": 0.6928467153284672, "grad_norm": 1.7331830301897417, "learning_rate": 2.2769370137801305e-06, "loss": 0.2635, "step": 7119 }, { "epoch": 0.6929440389294403, "grad_norm": 1.7928356378879804, "learning_rate": 2.2756152470157474e-06, "loss": 0.4523, "step": 7120 }, { "epoch": 0.6930413625304136, "grad_norm": 1.8692269778658488, "learning_rate": 2.2742937509751505e-06, "loss": 0.4466, "step": 7121 }, { "epoch": 0.6931386861313868, "grad_norm": 1.6143047789787386, "learning_rate": 2.2729725257896616e-06, "loss": 0.4384, "step": 7122 }, { "epoch": 0.6932360097323601, "grad_norm": 1.4067371796084285, "learning_rate": 2.271651571590567e-06, "loss": 0.3634, "step": 7123 }, { "epoch": 0.6933333333333334, "grad_norm": 1.4539553823066247, "learning_rate": 2.2703308885091324e-06, "loss": 0.4489, "step": 7124 }, { "epoch": 0.6934306569343066, "grad_norm": 1.6923470292487963, "learning_rate": 2.2690104766765956e-06, "loss": 0.3835, "step": 7125 }, { "epoch": 0.6935279805352798, "grad_norm": 1.3523041559039422, "learning_rate": 2.267690336224168e-06, "loss": 0.3914, "step": 7126 }, { "epoch": 0.693625304136253, "grad_norm": 1.6374338267692956, "learning_rate": 2.266370467283029e-06, "loss": 0.5484, "step": 7127 }, { "epoch": 0.6937226277372263, "grad_norm": 1.48241198389425, "learning_rate": 2.265050869984337e-06, "loss": 0.3439, "step": 7128 }, { "epoch": 0.6938199513381995, "grad_norm": 1.7498954332991097, "learning_rate": 2.2637315444592194e-06, "loss": 0.3914, "step": 7129 }, { "epoch": 0.6939172749391728, "grad_norm": 1.4413933482019134, "learning_rate": 2.262412490838784e-06, "loss": 0.4159, "step": 7130 }, { "epoch": 0.694014598540146, "grad_norm": 1.650525980257544, "learning_rate": 2.2610937092540995e-06, "loss": 0.2635, "step": 7131 }, { "epoch": 0.6941119221411193, "grad_norm": 1.4291678563030545, "learning_rate": 2.259775199836217e-06, "loss": 0.2207, "step": 7132 }, { "epoch": 0.6942092457420924, "grad_norm": 1.5482230429622463, "learning_rate": 2.2584569627161596e-06, "loss": 0.4786, "step": 7133 }, { "epoch": 0.6943065693430657, "grad_norm": 1.3125996324779596, "learning_rate": 2.2571389980249165e-06, "loss": 0.3913, "step": 7134 }, { "epoch": 0.6944038929440389, "grad_norm": 1.5645298552061642, "learning_rate": 2.255821305893457e-06, "loss": 0.4752, "step": 7135 }, { "epoch": 0.6945012165450122, "grad_norm": 1.838801580829783, "learning_rate": 2.254503886452721e-06, "loss": 0.4698, "step": 7136 }, { "epoch": 0.6945985401459854, "grad_norm": 1.3377697966651918, "learning_rate": 2.2531867398336233e-06, "loss": 0.3974, "step": 7137 }, { "epoch": 0.6946958637469587, "grad_norm": 1.6965012843101581, "learning_rate": 2.2518698661670456e-06, "loss": 0.481, "step": 7138 }, { "epoch": 0.6947931873479318, "grad_norm": 1.4329440024296556, "learning_rate": 2.2505532655838466e-06, "loss": 0.3174, "step": 7139 }, { "epoch": 0.6948905109489051, "grad_norm": 1.4171523589263946, "learning_rate": 2.2492369382148634e-06, "loss": 0.3386, "step": 7140 }, { "epoch": 0.6949878345498783, "grad_norm": 1.3472101772508374, "learning_rate": 2.2479208841908946e-06, "loss": 0.4004, "step": 7141 }, { "epoch": 0.6950851581508516, "grad_norm": 1.63526156068584, "learning_rate": 2.246605103642719e-06, "loss": 0.6597, "step": 7142 }, { "epoch": 0.6951824817518248, "grad_norm": 1.6193031809232163, "learning_rate": 2.245289596701086e-06, "loss": 0.2818, "step": 7143 }, { "epoch": 0.6952798053527981, "grad_norm": 1.523570676019948, "learning_rate": 2.2439743634967216e-06, "loss": 0.3612, "step": 7144 }, { "epoch": 0.6953771289537712, "grad_norm": 1.2057833347003468, "learning_rate": 2.2426594041603165e-06, "loss": 0.2034, "step": 7145 }, { "epoch": 0.6954744525547445, "grad_norm": 1.4226443546306904, "learning_rate": 2.2413447188225417e-06, "loss": 0.3774, "step": 7146 }, { "epoch": 0.6955717761557177, "grad_norm": 1.3792925069432436, "learning_rate": 2.240030307614037e-06, "loss": 0.3182, "step": 7147 }, { "epoch": 0.695669099756691, "grad_norm": 1.2143486461440078, "learning_rate": 2.2387161706654196e-06, "loss": 0.228, "step": 7148 }, { "epoch": 0.6957664233576643, "grad_norm": 1.5365330392380654, "learning_rate": 2.23740230810727e-06, "loss": 0.4679, "step": 7149 }, { "epoch": 0.6958637469586375, "grad_norm": 1.3932926436845328, "learning_rate": 2.236088720070152e-06, "loss": 0.381, "step": 7150 }, { "epoch": 0.6959610705596107, "grad_norm": 1.5468794697398334, "learning_rate": 2.234775406684599e-06, "loss": 0.5677, "step": 7151 }, { "epoch": 0.6960583941605839, "grad_norm": 1.1817886133763231, "learning_rate": 2.233462368081112e-06, "loss": 0.2683, "step": 7152 }, { "epoch": 0.6961557177615572, "grad_norm": 1.5161720153931184, "learning_rate": 2.2321496043901698e-06, "loss": 0.4507, "step": 7153 }, { "epoch": 0.6962530413625304, "grad_norm": 1.3402527832617142, "learning_rate": 2.230837115742222e-06, "loss": 0.3581, "step": 7154 }, { "epoch": 0.6963503649635037, "grad_norm": 1.4744590025771356, "learning_rate": 2.2295249022676945e-06, "loss": 0.4013, "step": 7155 }, { "epoch": 0.6964476885644769, "grad_norm": 1.326370965904232, "learning_rate": 2.2282129640969786e-06, "loss": 0.4208, "step": 7156 }, { "epoch": 0.6965450121654502, "grad_norm": 1.5182291841152533, "learning_rate": 2.226901301360444e-06, "loss": 0.3771, "step": 7157 }, { "epoch": 0.6966423357664233, "grad_norm": 1.5146032605790822, "learning_rate": 2.225589914188433e-06, "loss": 0.3303, "step": 7158 }, { "epoch": 0.6967396593673966, "grad_norm": 1.6804953827957696, "learning_rate": 2.2242788027112544e-06, "loss": 0.4242, "step": 7159 }, { "epoch": 0.6968369829683698, "grad_norm": 1.9117629581978521, "learning_rate": 2.222967967059199e-06, "loss": 0.3716, "step": 7160 }, { "epoch": 0.6969343065693431, "grad_norm": 1.229105360582037, "learning_rate": 2.221657407362523e-06, "loss": 0.3342, "step": 7161 }, { "epoch": 0.6970316301703163, "grad_norm": 1.4308496140072384, "learning_rate": 2.2203471237514606e-06, "loss": 0.4376, "step": 7162 }, { "epoch": 0.6971289537712896, "grad_norm": 1.4614854080980406, "learning_rate": 2.2190371163562115e-06, "loss": 0.4659, "step": 7163 }, { "epoch": 0.6972262773722627, "grad_norm": 1.4187512301132987, "learning_rate": 2.2177273853069525e-06, "loss": 0.2365, "step": 7164 }, { "epoch": 0.697323600973236, "grad_norm": 1.3089222054558105, "learning_rate": 2.216417930733834e-06, "loss": 0.3082, "step": 7165 }, { "epoch": 0.6974209245742092, "grad_norm": 1.261902408991811, "learning_rate": 2.2151087527669783e-06, "loss": 0.282, "step": 7166 }, { "epoch": 0.6975182481751825, "grad_norm": 1.7640787291139268, "learning_rate": 2.2137998515364754e-06, "loss": 0.5948, "step": 7167 }, { "epoch": 0.6976155717761557, "grad_norm": 1.632814836272252, "learning_rate": 2.2124912271723945e-06, "loss": 0.5355, "step": 7168 }, { "epoch": 0.697712895377129, "grad_norm": 1.4433366621447463, "learning_rate": 2.2111828798047736e-06, "loss": 0.4636, "step": 7169 }, { "epoch": 0.6978102189781021, "grad_norm": 1.7838258021714664, "learning_rate": 2.2098748095636236e-06, "loss": 0.3765, "step": 7170 }, { "epoch": 0.6979075425790754, "grad_norm": 1.447955215636187, "learning_rate": 2.208567016578929e-06, "loss": 0.3262, "step": 7171 }, { "epoch": 0.6980048661800486, "grad_norm": 1.5231870005139048, "learning_rate": 2.2072595009806457e-06, "loss": 0.2944, "step": 7172 }, { "epoch": 0.6981021897810219, "grad_norm": 1.5448173940946588, "learning_rate": 2.205952262898704e-06, "loss": 0.2999, "step": 7173 }, { "epoch": 0.6981995133819952, "grad_norm": 1.5943167183757534, "learning_rate": 2.2046453024630016e-06, "loss": 0.3786, "step": 7174 }, { "epoch": 0.6982968369829684, "grad_norm": 1.4530451762927175, "learning_rate": 2.203338619803414e-06, "loss": 0.2946, "step": 7175 }, { "epoch": 0.6983941605839417, "grad_norm": 1.3574104583040578, "learning_rate": 2.202032215049788e-06, "loss": 0.3585, "step": 7176 }, { "epoch": 0.6984914841849148, "grad_norm": 2.1649128589316424, "learning_rate": 2.2007260883319392e-06, "loss": 0.3878, "step": 7177 }, { "epoch": 0.698588807785888, "grad_norm": 1.5012934988614022, "learning_rate": 2.1994202397796594e-06, "loss": 0.3496, "step": 7178 }, { "epoch": 0.6986861313868613, "grad_norm": 1.8667082231300942, "learning_rate": 2.198114669522711e-06, "loss": 0.3446, "step": 7179 }, { "epoch": 0.6987834549878346, "grad_norm": 2.059006246104315, "learning_rate": 2.196809377690831e-06, "loss": 0.3747, "step": 7180 }, { "epoch": 0.6988807785888078, "grad_norm": 1.3436752121096145, "learning_rate": 2.1955043644137258e-06, "loss": 0.2313, "step": 7181 }, { "epoch": 0.6989781021897811, "grad_norm": 1.2639389109697716, "learning_rate": 2.194199629821076e-06, "loss": 0.3304, "step": 7182 }, { "epoch": 0.6990754257907542, "grad_norm": 1.4891998579912018, "learning_rate": 2.192895174042533e-06, "loss": 0.4796, "step": 7183 }, { "epoch": 0.6991727493917275, "grad_norm": 1.353731917016591, "learning_rate": 2.1915909972077244e-06, "loss": 0.2765, "step": 7184 }, { "epoch": 0.6992700729927007, "grad_norm": 1.4471954452240534, "learning_rate": 2.1902870994462423e-06, "loss": 0.3412, "step": 7185 }, { "epoch": 0.699367396593674, "grad_norm": 1.4493037196335952, "learning_rate": 2.1889834808876583e-06, "loss": 0.3935, "step": 7186 }, { "epoch": 0.6994647201946472, "grad_norm": 1.4823255344291792, "learning_rate": 2.187680141661515e-06, "loss": 0.3114, "step": 7187 }, { "epoch": 0.6995620437956205, "grad_norm": 1.3925930198953602, "learning_rate": 2.1863770818973235e-06, "loss": 0.2601, "step": 7188 }, { "epoch": 0.6996593673965936, "grad_norm": 1.4217463843895148, "learning_rate": 2.18507430172457e-06, "loss": 0.4411, "step": 7189 }, { "epoch": 0.6997566909975669, "grad_norm": 1.4457160180214488, "learning_rate": 2.183771801272714e-06, "loss": 0.239, "step": 7190 }, { "epoch": 0.6998540145985401, "grad_norm": 1.3826602485591162, "learning_rate": 2.1824695806711847e-06, "loss": 0.396, "step": 7191 }, { "epoch": 0.6999513381995134, "grad_norm": 1.63230939351161, "learning_rate": 2.1811676400493853e-06, "loss": 0.2797, "step": 7192 }, { "epoch": 0.7000486618004866, "grad_norm": 1.5764297595819967, "learning_rate": 2.17986597953669e-06, "loss": 0.5107, "step": 7193 }, { "epoch": 0.7001459854014599, "grad_norm": 1.4846905021661214, "learning_rate": 2.178564599262447e-06, "loss": 0.2824, "step": 7194 }, { "epoch": 0.7002433090024331, "grad_norm": 1.6519622001479857, "learning_rate": 2.1772634993559725e-06, "loss": 0.4299, "step": 7195 }, { "epoch": 0.7003406326034063, "grad_norm": 1.6158671863479424, "learning_rate": 2.1759626799465596e-06, "loss": 0.3323, "step": 7196 }, { "epoch": 0.7004379562043795, "grad_norm": 1.4162209407993407, "learning_rate": 2.1746621411634705e-06, "loss": 0.2818, "step": 7197 }, { "epoch": 0.7005352798053528, "grad_norm": 1.4777578531003996, "learning_rate": 2.1733618831359426e-06, "loss": 0.3929, "step": 7198 }, { "epoch": 0.700632603406326, "grad_norm": 1.3071594576708383, "learning_rate": 2.17206190599318e-06, "loss": 0.1265, "step": 7199 }, { "epoch": 0.7007299270072993, "grad_norm": 1.380455508381796, "learning_rate": 2.1707622098643646e-06, "loss": 0.35, "step": 7200 }, { "epoch": 0.7008272506082726, "grad_norm": 1.9651320380579909, "learning_rate": 2.169462794878647e-06, "loss": 0.4279, "step": 7201 }, { "epoch": 0.7009245742092457, "grad_norm": 1.6172999098773795, "learning_rate": 2.168163661165151e-06, "loss": 0.4402, "step": 7202 }, { "epoch": 0.701021897810219, "grad_norm": 1.3452355800352764, "learning_rate": 2.166864808852973e-06, "loss": 0.3383, "step": 7203 }, { "epoch": 0.7011192214111922, "grad_norm": 1.7373915549266996, "learning_rate": 2.16556623807118e-06, "loss": 0.3531, "step": 7204 }, { "epoch": 0.7012165450121655, "grad_norm": 1.4004273988372486, "learning_rate": 2.164267948948814e-06, "loss": 0.3775, "step": 7205 }, { "epoch": 0.7013138686131387, "grad_norm": 1.5907312863556262, "learning_rate": 2.1629699416148832e-06, "loss": 0.5438, "step": 7206 }, { "epoch": 0.701411192214112, "grad_norm": 1.3487333554433167, "learning_rate": 2.1616722161983734e-06, "loss": 0.3756, "step": 7207 }, { "epoch": 0.7015085158150851, "grad_norm": 1.6982980623535286, "learning_rate": 2.1603747728282395e-06, "loss": 0.7085, "step": 7208 }, { "epoch": 0.7016058394160584, "grad_norm": 1.5047427538652773, "learning_rate": 2.1590776116334117e-06, "loss": 0.3471, "step": 7209 }, { "epoch": 0.7017031630170316, "grad_norm": 1.385831092095565, "learning_rate": 2.157780732742786e-06, "loss": 0.4245, "step": 7210 } ], "logging_steps": 1.0, "max_steps": 10275, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 515, "total_flos": 922865691402240.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }